From 6ae84d980c8459888f767b5f5a8228758f1639f7 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 15 May 2024 09:24:18 +0200 Subject: [PATCH 01/48] [jtmk2] gg_tt.mad cudacpp.mk cleanup: rename 'runTest.exe' executable as 'runTest_cpp.exe' or 'runTest_cuda.exe' --- epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index f047530438..cb3e17bac5 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -617,7 +617,11 @@ else fgpu_main= endif -testmain=$(BUILDDIR)/runTest.exe +ifeq ($(GPUCC),) + testmain=$(BUILDDIR)/runTest_cpp.exe +else + testmain=$(BUILDDIR)/runTest_$(GPUSUFFIX).exe +endif # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) From a89836ed8fc60bafd171853ca8f3483e277be133 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 15 May 2024 09:41:55 +0200 Subject: [PATCH 02/48] [jtmk2] gg_tt.mad cudacpp.mk cleanup: replace '_main' variables by '_checkmain' for both cxx and gpu --- .../cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 46 +++++++++---------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index cb3e17bac5..cd4f87977b 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -606,15 +606,13 @@ override RUNTIME = #=== Makefile TARGETS and build rules below #=============================================================================== -cxx_main=$(BUILDDIR)/check.exe -fcxx_main=$(BUILDDIR)/fcheck.exe -ifneq ($(GPUCC),) - gpu_main=$(BUILDDIR)/gcheck.exe - fgpu_main=$(BUILDDIR)/fgcheck.exe +ifeq ($(GPUCC),) + cxx_checkmain=$(BUILDDIR)/check.exe + cxx_fcheckmain=$(BUILDDIR)/fcheck.exe else - gpu_main= - fgpu_main= + gpu_checkmain=$(BUILDDIR)/gcheck.exe + gpu_fcheckmain=$(BUILDDIR)/fgcheck.exe endif ifeq ($(GPUCC),) @@ -628,9 +626,9 @@ endif # First target (default goal) ifneq ($(GPUCC),) -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_main) $(fgpu_main) $(if $(GTESTLIBS),$(testmain)) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_checkmain) $(gpu_fcheckmain) $(if $(GTESTLIBS),$(testmain)) else -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_main) $(fcxx_main) $(if $(GTESTLIBS),$(testmain)) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_checkmain) $(cxx_fcheckmain) $(if $(GTESTLIBS),$(testmain)) endif # Target (and build options): debug @@ -760,19 +758,19 @@ endif #------------------------------------------------------------------------------- # Target (and build rules): C++ and CUDA/HIP standalone executables -$(cxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o +$(cxx_checkmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_checkmain): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(RNDLIBFLAGS) ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(gpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(gpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_checkmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_checkmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(gpu_main): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_checkmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(gpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(gpu_main): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o +$(gpu_checkmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_checkmain): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(GPUCC) -o $@ $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(RNDLIBFLAGS) endif @@ -793,10 +791,10 @@ $(BUILDDIR)/%.o : %.f *.inc ###$(BUILDDIR)/fcheck_sa.o : $(INCDIR)/fbridge.inc ifeq ($(UNAME_S),Darwin) -$(fcxx_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(cxx_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fcxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fcxx_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) +$(cxx_fcheckmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_fcheckmain): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ else @@ -805,14 +803,14 @@ endif ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(fgpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(fgpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_fcheckmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_fcheckmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') endif ifeq ($(UNAME_S),Darwin) -$(fgpu_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(gpu_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fgpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fgpu_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) +$(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else From fedf7ed7fb0c2b527654e3802321c0505154e808 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 15 May 2024 09:52:27 +0200 Subject: [PATCH 03/48] [jtmk2] gg_tt.mad cudacpp.mk cleanup: split 'testmain' variable into 'cxx_testmain' and 'gpu_testmain' --- .../cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 85 ++++++++++--------- 1 file changed, 46 insertions(+), 39 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index cd4f87977b..0c1fc9c1cb 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -610,15 +610,11 @@ override RUNTIME = ifeq ($(GPUCC),) cxx_checkmain=$(BUILDDIR)/check.exe cxx_fcheckmain=$(BUILDDIR)/fcheck.exe + cxx_testmain=$(BUILDDIR)/runTest_cpp.exe else gpu_checkmain=$(BUILDDIR)/gcheck.exe gpu_fcheckmain=$(BUILDDIR)/fgcheck.exe -endif - -ifeq ($(GPUCC),) - testmain=$(BUILDDIR)/runTest_cpp.exe -else - testmain=$(BUILDDIR)/runTest_$(GPUSUFFIX).exe + gpu_testmain=$(BUILDDIR)/runTest_$(GPUSUFFIX).exe endif # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) @@ -626,9 +622,9 @@ endif # First target (default goal) ifneq ($(GPUCC),) -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_checkmain) $(gpu_fcheckmain) $(if $(GTESTLIBS),$(testmain)) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_checkmain) $(gpu_fcheckmain) $(if $(GTESTLIBS),$(gpu_testmain)) else -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_checkmain) $(cxx_fcheckmain) $(if $(GTESTLIBS),$(testmain)) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_checkmain) $(cxx_fcheckmain) $(if $(GTESTLIBS),$(cxx_testmain)) endif # Target (and build options): debug @@ -825,80 +821,91 @@ ifeq ($(GPUCC),) $(BUILDDIR)/testxxx.o: $(GTESTLIBS) $(BUILDDIR)/testxxx.o: INCFLAGS += $(GTESTINC) $(BUILDDIR)/testxxx.o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testxxx.o # Comment out this line to skip the C++ test of xxx functions +$(cxx_testmain): $(BUILDDIR)/testxxx.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testxxx.o # Comment out this line to skip the C++ test of xxx functions else $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions +$(gpu_testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions endif ifneq ($(UNAME_S),Darwin) # Disable testmisc on Darwin (workaround for issue #838) ifeq ($(GPUCC),) $(BUILDDIR)/testmisc.o: $(GTESTLIBS) $(BUILDDIR)/testmisc.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testmisc.o # Comment out this line to skip the C++ miscellaneous tests +$(cxx_testmain): $(BUILDDIR)/testmisc.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testmisc.o # Comment out this line to skip the C++ miscellaneous tests else $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests +$(gpu_testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests endif endif ifeq ($(GPUCC),) $(BUILDDIR)/runTest.o: $(GTESTLIBS) $(BUILDDIR)/runTest.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/runTest.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/runTest.o +$(cxx_testmain): $(BUILDDIR)/runTest.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/runTest.o else $(BUILDDIR)/runTest_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/runTest_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o endif -$(testmain): $(GTESTLIBS) -$(testmain): INCFLAGS += $(GTESTINC) -$(testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest -###$(testmain): LIBFLAGS += -lgtest_main # no longer necessary since we added main() to testxxx.cc +ifeq ($(GPUCC),) +$(cxx_testmain): $(GTESTLIBS) +$(cxx_testmain): INCFLAGS += $(GTESTINC) +$(cxx_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +else +$(gpu_testmain): $(GTESTLIBS) +$(gpu_testmain): INCFLAGS += $(GTESTINC) +$(gpu_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +endif +ifeq ($(GPUCC),) # if at all, OMP is used only in CXX builds (not in GPU builds) ifneq ($(OMPFLAGS),) ifneq ($(shell $(CXX) --version | egrep '^Intel'),) -$(testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) +$(cxx_testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) else ifneq ($(shell $(CXX) --version | egrep '^clang'),) -$(testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 +$(cxx_testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 ###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -###$(testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) +###$(cxx_testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) else -$(testmain): LIBFLAGS += -lgomp +$(cxx_testmain): LIBFLAGS += -lgomp +endif endif endif # Test quadmath in testmisc.cc tests for constexpr_math #627 -###$(testmain): LIBFLAGS += -lquadmath +###ifeq ($(GPUCC),) +###$(cxx_testmain): LIBFLAGS += -lquadmath +###else +###$(gpu_testmain): LIBFLAGS += -lquadmath +###endif # Bypass std::filesystem completely to ease portability on LUMI #803 -#ifneq ($(findstring hipcc,$(GPUCC)),) -#$(testmain): LIBFLAGS += -lstdc++fs -#endif +###ifneq ($(findstring hipcc,$(GPUCC)),) +###$(gpu_testmain): LIBFLAGS += -lstdc++fs +###endif ifeq ($(GPUCC),) # link only runTest.o -$(testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) +$(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) $(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS) else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest.o and runTest_$(GPUSUFFIX).o) -$(testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) +$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else From a3898656bf8555f3c94e56b967dfe355eef8904d Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 15 May 2024 09:55:39 +0200 Subject: [PATCH 04/48] [jtmk2] gg_tt.mad cudacpp.mk cleanup: rename '[f]check.exe' as '[f]check_cpp.exe' and '[f]gcheck.exe' as '[f]check_cuda.exe' --- epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index 0c1fc9c1cb..c24d6fad25 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -608,12 +608,12 @@ override RUNTIME = ifeq ($(GPUCC),) - cxx_checkmain=$(BUILDDIR)/check.exe - cxx_fcheckmain=$(BUILDDIR)/fcheck.exe + cxx_checkmain=$(BUILDDIR)/check_cpp.exe + cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe cxx_testmain=$(BUILDDIR)/runTest_cpp.exe else - gpu_checkmain=$(BUILDDIR)/gcheck.exe - gpu_fcheckmain=$(BUILDDIR)/fgcheck.exe + gpu_checkmain=$(BUILDDIR)/check_$(GPUSUFFIX).exe + gpu_fcheckmain=$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe gpu_testmain=$(BUILDDIR)/runTest_$(GPUSUFFIX).exe endif From d63be5707ff34e3b824a704baedc4c0652c7195e Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 15 May 2024 10:02:52 +0200 Subject: [PATCH 05/48] [jtmk2] gg_tt.mad cudacpp.mk bug fix: replace gCrossSectionKernels.o by CrossSectionKernels_$(GPUSUFFIX).o --- epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index c24d6fad25..2baef9e3c8 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -646,7 +646,7 @@ $(BUILDDIR)/.build.$(TAG): ifeq ($(shell $(CXX) --version | grep ^nvc++),) $(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) $(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -fno-fast-math -$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math endif # Apply special build flags only to check_sa[_$(GPUSUFFIX)].o (NVTX in timermap.h, #679) @@ -679,7 +679,7 @@ endif ###ifneq ($(shell $(CXX) --version | egrep '^(clang|Apple clang|Intel)'),) ###$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -Wno-overriding-t-option ###ifneq ($(GPUCC),) -###$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option ###endif ###endif From bd6c4876696efd9bdd75fb8b8edc2c9e3d53d487 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 15 May 2024 10:55:15 +0200 Subject: [PATCH 06/48] [jtmk2] gg_tt.mad cudacpp.mk cleanup: fix runTest, runCheck, runFcheck, cmpFcheck targets after changing executable names --- .../cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 61 +++++++++---------- 1 file changed, 30 insertions(+), 31 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index 2baef9e3c8..053f796a00 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -1060,51 +1060,50 @@ endif #------------------------------------------------------------------------------- -# Target: check (run the C++ test executable) +# Target: check (execute runTest.exe, and compare check.exe with fcheck.exe) # [NB THIS IS WHAT IS USED IN THE GITHUB CI!] # [FIXME: SHOULD CHANGE THE TARGET NAME "check" THAT HAS NOTHING TO DO WITH "check.exe"] -ifneq ($(GPUCC),) -check: runTest cmpFGcheck -else check: runTest cmpFcheck -endif -# Target: runTest (run the C++ test executable runTest.exe) +# Target: runTest (run the C++ or CUDA/HIP test executable runTest.exe) runTest: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/runTest.exe +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/runTest_cpp.exe +else + $(RUNTIME) $(BUILDDIR)/runTest_$(GPUSUFFIX).exe +endif -# Target: runCheck (run the C++ standalone executable check.exe, with a small number of events) +# Target: runCheck (run the C++ or CUDA/HIP standalone executable check.exe, with a small number of events) runCheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/check.exe -p 2 32 2 - -# Target: runGcheck (run the CUDA/HIP standalone executable gcheck.exe, with a small number of events) -runGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/gcheck.exe -p 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/check_cpp.exe -p 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +endif -# Target: runFcheck (run the Fortran standalone executable - with C++ MEs - fcheck.exe, with a small number of events) +# Target: runFcheck (run the Fortran standalone executable - with C++ or CUDA/HIP MEs - fcheck.exe, with a small number of events) runFcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 - -# Target: runFGcheck (run the Fortran standalone executable - with CUDA/HIP MEs - fgcheck.exe, with a small number of events) -runFGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 +endif -# Target: cmpFcheck (compare ME results from the C++ and Fortran with C++ MEs standalone executables, with a small number of events) +# Target: cmpFcheck (compare ME results from the C++/CUDA/HIP and Fortran with C++/CUDA/HIP MEs standalone executables, with a small number of events) cmpFcheck: all.$(TAG) @echo - @echo "$(BUILDDIR)/check.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/check.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi - -# Target: cmpFGcheck (compare ME results from the CUDA/HIP and Fortran with CUDA/HIP MEs standalone executables, with a small number of events) -cmpFGcheck: all.$(TAG) - @echo - @echo "$(BUILDDIR)/gcheck.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fgcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/gcheck.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +ifeq ($(GPUCC),) + @echo "$(BUILDDIR)/check_cpp.exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_cpp.exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_cpp.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +else + @echo "$(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +endif # Target: cuda-memcheck (run the CUDA standalone executable gcheck.exe with a small number of events through cuda-memcheck) cuda-memcheck: all.$(TAG) - $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/gcheck.exe -p 2 32 2 + $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 #------------------------------------------------------------------------------- From bdcd2eaede977c7cbe07cb16abcff364ada2c6d0 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 15 May 2024 11:00:48 +0200 Subject: [PATCH 07/48] [jtmk2] bug fix in CODEGEN cudacpp.mk: replace gCrossSectionKernels.o by CrossSectionKernels_$(GPUSUFFIX).o --- .../madgraph/iolibs/template_files/gpu/cudacpp.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk index 8952caa3f0..ed1f2a1bef 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk @@ -648,7 +648,7 @@ $(BUILDDIR)/.build.$(TAG): ifeq ($(shell $(CXX) --version | grep ^nvc++),) $(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) $(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -fno-fast-math -$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math endif # Apply special build flags only to check_sa[_$(GPUSUFFIX)].o (NVTX in timermap.h, #679) @@ -681,7 +681,7 @@ endif ###ifneq ($(shell $(CXX) --version | egrep '^(clang|Apple clang|Intel)'),) ###$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -Wno-overriding-t-option ###ifneq ($(GPUCC),) -###$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option ###endif ###endif From 57358242bc5dc61ede249853abb2ef38c4e4def3 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 15 May 2024 11:13:03 +0200 Subject: [PATCH 08/48] [jtmk2] gg_tt.mad cudacpp.mk cleanup: rename fcheck_sa.o as fcheck_sa_fortran.o --- .../cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index 053f796a00..d5dc3973da 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -773,28 +773,28 @@ endif #------------------------------------------------------------------------------- # Generic target and build rules: objects from Fortran compilation -$(BUILDDIR)/%.o : %.f *.inc +$(BUILDDIR)/%_fortran.o : %.f *.inc @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(FC) -I. -c $< -o $@ # Generic target and build rules: objects from Fortran compilation -###$(BUILDDIR)/%.o : %.f *.inc +###$(BUILDDIR)/%_fortran.o : %.f *.inc ### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi ### @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi ### $(FC) -I. -I$(INCDIR) -c $< -o $@ # Target (and build rules): Fortran standalone executables -###$(BUILDDIR)/fcheck_sa.o : $(INCDIR)/fbridge.inc +###$(BUILDDIR)/fcheck_sa_fortran.o : $(INCDIR)/fbridge.inc ifeq ($(UNAME_S),Darwin) $(cxx_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif $(cxx_fcheckmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cxx_fcheckmain): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) +$(cxx_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ else - $(CXX) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) + $(CXX) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) endif ifneq ($(GPUCC),) @@ -806,11 +806,11 @@ ifeq ($(UNAME_S),Darwin) $(gpu_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif $(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) +$(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else - $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) + $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif endif From 8156fb535f31e652495a30131dadee4b5dadd80f Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 15 May 2024 11:21:24 +0200 Subject: [PATCH 09/48] [jtmk2] gg_tt.mad cudacpp.mk cleanup: rename all xxx.o compiled via C++ as xxx_cpp.o --- .../cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 78 +++++++++---------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index d5dc3973da..9e077d248d 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -641,30 +641,30 @@ $(BUILDDIR)/.build.$(TAG): @if [ "$(oldtagsb)" != "" ]; then echo "Cannot build for tag=$(TAG) as old builds exist for other tags:"; echo " $(oldtagsb)"; echo "Please run 'make clean' first\nIf 'make clean' is not enough: run 'make clean USEBUILDDIR=1 AVX=$(AVX) FPTYPE=$(FPTYPE)' or 'make cleanall'"; exit 1; fi @touch $(BUILDDIR)/.build.$(TAG) -# Apply special build flags only to CrossSectionKernel[_$(GPUSUFFIX)].o (no fast math, see #117 and #516) +# Apply special build flags only to CrossSectionKernel_.o (no fast math, see #117 and #516) # Added edgecase for HIP compilation ifeq ($(shell $(CXX) --version | grep ^nvc++),) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -fno-fast-math $(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math endif -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o (NVTX in timermap.h, #679) -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) +# Apply special build flags only to check_sa_.o (NVTX in timermap.h, #679) +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o and (Cu|Hip)randRandomNumberKernel[_$(GPUSUFFIX)].o -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(RNDCXXFLAGS) +# Apply special build flags only to check_sa_.o and (Cu|Hip)randRandomNumberKernel_.o +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) ifeq ($(HASCURAND),hasCurand) # curand headers, #679 -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUDA_INC) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(CUDA_INC) endif ifeq ($(HASHIPRAND),hasHiprand) # hiprand headers -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(HIP_INC) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(HIP_INC) endif # Avoid "warning: builtin __has_trivial_... is deprecated; use __is_trivially_... instead" in GPUCC with icx2023 (#592) @@ -677,21 +677,21 @@ endif # Avoid clang warning "overriding '-ffp-contract=fast' option with '-ffp-contract=on'" (#516) # This patch does remove the warning, but I prefer to keep it disabled for the moment... ###ifneq ($(shell $(CXX) --version | egrep '^(clang|Apple clang|Intel)'),) -###$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -Wno-overriding-t-option ###ifneq ($(GPUCC),) ###$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option ###endif ###endif #### Apply special build flags only to CPPProcess.o (-flto) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += -flto +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += -flto #### Apply special build flags only to CPPProcess.o (AVXFLAGS) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += $(AVXFLAGS) +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += $(AVXFLAGS) # Generic target and build rules: objects from C++ compilation # (NB do not include CUDA_INC here! add it only for NVTX or curand #679) -$(BUILDDIR)/%.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -716,8 +716,8 @@ processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') ###$(info processid_short=$(processid_short)) MG5AMC_CXXLIB = mg5amc_$(processid_short)_cpp -cxx_objects_lib=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o -cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel.o $(BUILDDIR)/RamboSamplingKernels.o +cxx_objects_lib=$(BUILDDIR)/CPPProcess_cpp.o $(BUILDDIR)/MatrixElementKernels_cpp.o $(BUILDDIR)/BridgeKernels_cpp.o $(BUILDDIR)/CrossSectionKernels_cpp.o +cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_cpp.o $(BUILDDIR)/RamboSamplingKernels_cpp.o ifneq ($(GPUCC),) MG5AMC_GPULIB = mg5amc_$(processid_short)_$(GPUSUFFIX) @@ -726,8 +726,8 @@ gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/ endif # Target (and build rules): C++ and CUDA/HIP shared libraries -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) @@ -755,8 +755,8 @@ endif # Target (and build rules): C++ and CUDA/HIP standalone executables $(cxx_checkmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cxx_checkmain): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o - $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(RNDLIBFLAGS) +$(cxx_checkmain): $(BUILDDIR)/check_sa_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o + $(CXX) -o $@ $(BUILDDIR)/check_sa_cpp.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o $(RNDLIBFLAGS) ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) @@ -790,11 +790,11 @@ ifeq ($(UNAME_S),Darwin) $(cxx_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif $(cxx_fcheckmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cxx_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) +$(cxx_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ else - $(CXX) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) + $(CXX) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) endif ifneq ($(GPUCC),) @@ -818,11 +818,11 @@ endif # Target (and build rules): test objects and test executable ifeq ($(GPUCC),) -$(BUILDDIR)/testxxx.o: $(GTESTLIBS) -$(BUILDDIR)/testxxx.o: INCFLAGS += $(GTESTINC) -$(BUILDDIR)/testxxx.o: testxxx_cc_ref.txt -$(cxx_testmain): $(BUILDDIR)/testxxx.o -$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testxxx.o # Comment out this line to skip the C++ test of xxx functions +$(BUILDDIR)/testxxx_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testxxx_cpp.o: INCFLAGS += $(GTESTINC) +$(BUILDDIR)/testxxx_cpp.o: testxxx_cc_ref.txt +$(cxx_testmain): $(BUILDDIR)/testxxx_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testxxx_cpp.o # Comment out this line to skip the C++ test of xxx functions else $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) @@ -833,10 +833,10 @@ endif ifneq ($(UNAME_S),Darwin) # Disable testmisc on Darwin (workaround for issue #838) ifeq ($(GPUCC),) -$(BUILDDIR)/testmisc.o: $(GTESTLIBS) -$(BUILDDIR)/testmisc.o: INCFLAGS += $(GTESTINC) -$(cxx_testmain): $(BUILDDIR)/testmisc.o -$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testmisc.o # Comment out this line to skip the C++ miscellaneous tests +$(BUILDDIR)/testmisc_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testmisc_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/testmisc_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testmisc_cpp.o # Comment out this line to skip the C++ miscellaneous tests else $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) @@ -846,10 +846,10 @@ endif endif ifeq ($(GPUCC),) -$(BUILDDIR)/runTest.o: $(GTESTLIBS) -$(BUILDDIR)/runTest.o: INCFLAGS += $(GTESTINC) -$(cxx_testmain): $(BUILDDIR)/runTest.o -$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/runTest.o +$(BUILDDIR)/runTest_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/runTest_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/runTest_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/runTest_cpp.o else $(BUILDDIR)/runTest_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/runTest_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) @@ -899,11 +899,11 @@ endif ###$(gpu_testmain): LIBFLAGS += -lstdc++fs ###endif -ifeq ($(GPUCC),) # link only runTest.o +ifeq ($(GPUCC),) # link only runTest_cpp.o $(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) $(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS) -else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest.o and runTest_$(GPUSUFFIX).o) +else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o) $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 From cb608bd1299f4e03b54645e74a186f8696ececc3 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 15 May 2024 11:26:14 +0200 Subject: [PATCH 10/48] [jtmk2] gg_tt.mad cudacpp_src.mk cleanup: rename all xxx.o compiled via C++ as xxx_cpp.o --- epochX/cudacpp/gg_tt.mad/src/cudacpp_src.mk | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/src/cudacpp_src.mk b/epochX/cudacpp/gg_tt.mad/src/cudacpp_src.mk index f727c0b313..1555bc4640 100644 --- a/epochX/cudacpp/gg_tt.mad/src/cudacpp_src.mk +++ b/epochX/cudacpp/gg_tt.mad/src/cudacpp_src.mk @@ -126,7 +126,7 @@ $(LIBDIR)/.build.$(TAG): #------------------------------------------------------------------------------- # Generic target and build rules: objects from C++ compilation -$(BUILDDIR)/%.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -139,11 +139,11 @@ endif #------------------------------------------------------------------------------- -cxx_objects=$(addprefix $(BUILDDIR)/, read_slha.o) +cxx_objects=$(addprefix $(BUILDDIR)/, read_slha_cpp.o) ifneq ($(GPUCC),) gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_sm_$(GPUSUFFIX).o) else - cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_sm.o) + cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_sm_cpp.o) endif # Target (and build rules): common (src) library From fb1c1f5d070dcea5eec04ec5dfbe475029c224c6 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 15 May 2024 11:28:55 +0200 Subject: [PATCH 11/48] [jtmk2] gg_tt.mad cudacpp_src.mk cleanup: reorder command blocks to always have _cpp first and _cuda second --- epochX/cudacpp/gg_tt.mad/src/cudacpp_src.mk | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/src/cudacpp_src.mk b/epochX/cudacpp/gg_tt.mad/src/cudacpp_src.mk index 1555bc4640..4eb0afa3db 100644 --- a/epochX/cudacpp/gg_tt.mad/src/cudacpp_src.mk +++ b/epochX/cudacpp/gg_tt.mad/src/cudacpp_src.mk @@ -140,21 +140,21 @@ endif #------------------------------------------------------------------------------- cxx_objects=$(addprefix $(BUILDDIR)/, read_slha_cpp.o) -ifneq ($(GPUCC),) - gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_sm_$(GPUSUFFIX).o) -else +ifeq ($(GPUCC),) cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_sm_cpp.o) +else + gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_sm_$(GPUSUFFIX).o) endif # Target (and build rules): common (src) library -ifneq ($(GPUCC),) -$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) - @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) -else +ifeq ($(GPUCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) +else +$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) + @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi + $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- From 531e456d2664de747962cc354cdc3aaa81e88900 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 15 May 2024 11:31:03 +0200 Subject: [PATCH 12/48] [jtmk2] gg_tt.mad cudacpp.mk cleanup: reorder command blocks to always have _cpp first and _cuda second --- epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index 9e077d248d..403ef34f54 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -621,10 +621,10 @@ endif .DEFAULT_GOAL := all.$(TAG) # First target (default goal) -ifneq ($(GPUCC),) -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_checkmain) $(gpu_fcheckmain) $(if $(GTESTLIBS),$(gpu_testmain)) -else +ifeq ($(GPUCC),) all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_checkmain) $(cxx_fcheckmain) $(if $(GTESTLIBS),$(cxx_testmain)) +else +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_checkmain) $(gpu_fcheckmain) $(if $(GTESTLIBS),$(gpu_testmain)) endif # Target (and build options): debug From ef8a77938443f74bd8ca0cb24752d899b08ea374 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 15 May 2024 11:33:00 +0200 Subject: [PATCH 13/48] [jtmk2] CODEGEN cudacpp_src.mk cleanup (backport gg_tt.mad) This includes in cudacpp_src.mk: - rename all xxx.o compiled via C++ as xxx_cpp.o - reorder command blocks to always have _cpp first and _cuda second --- .../iolibs/template_files/gpu/cudacpp_src.mk | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_src.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_src.mk index 0d27389194..6bfa89ee4a 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_src.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_src.mk @@ -126,7 +126,7 @@ $(LIBDIR)/.build.$(TAG): #------------------------------------------------------------------------------- # Generic target and build rules: objects from C++ compilation -$(BUILDDIR)/%%.o : %%.cc *.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%%_cpp.o : %%.cc *.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -139,22 +139,22 @@ endif #------------------------------------------------------------------------------- -cxx_objects=$(addprefix $(BUILDDIR)/, read_slha.o) -ifneq ($(GPUCC),) - gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_%(model)s_$(GPUSUFFIX).o) +cxx_objects=$(addprefix $(BUILDDIR)/, read_slha_cpp.o) +ifeq ($(GPUCC),) + cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_%(model)s_cpp.o) else - cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_%(model)s.o) + gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_%(model)s_$(GPUSUFFIX).o) endif # Target (and build rules): common (src) library -ifneq ($(GPUCC),) -$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) - @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) -else +ifeq ($(GPUCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) +else +$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) + @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi + $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- From 1a3a5555848c979197bc573675847f404d907c01 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 15 May 2024 11:44:36 +0200 Subject: [PATCH 14/48] [jtmk2] CODEGEN cudacpp.mk cleanup (backport gg_tt.mad) This includes in cudacpp.mk: - rename 'runTest.exe' executable as 'runTest_cpp.exe' or 'runTest_cuda.exe' - replace '_main' variables by '_checkmain' for both cxx and gpu - split 'testmain' variable into 'cxx_testmain' and 'gpu_testmain' - rename '[f]check.exe' as '[f]check_cpp.exe' and '[f]gcheck.exe' as '[f]check_cu> - fix runTest, runCheck, runFcheck, cmpFcheck targets after changing executable n> - rename fcheck_sa.o as fcheck_sa_fortran.o - rename all xxx.o compiled via C++ as xxx_cpp.o - reorder command blocks to always have _cpp first and _cuda second --- .../iolibs/template_files/gpu/cudacpp.mk | 258 +++++++++--------- 1 file changed, 133 insertions(+), 125 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk index ed1f2a1bef..6babc548d2 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk @@ -606,27 +606,25 @@ override RUNTIME = #=== Makefile TARGETS and build rules below #=============================================================================== -cxx_main=$(BUILDDIR)/check.exe -fcxx_main=$(BUILDDIR)/fcheck.exe -ifneq ($(GPUCC),) - gpu_main=$(BUILDDIR)/gcheck.exe - fgpu_main=$(BUILDDIR)/fgcheck.exe +ifeq ($(GPUCC),) + cxx_checkmain=$(BUILDDIR)/check_cpp.exe + cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe + cxx_testmain=$(BUILDDIR)/runTest_cpp.exe else - gpu_main= - fgpu_main= + gpu_checkmain=$(BUILDDIR)/check_$(GPUSUFFIX).exe + gpu_fcheckmain=$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe + gpu_testmain=$(BUILDDIR)/runTest_$(GPUSUFFIX).exe endif -testmain=$(BUILDDIR)/runTest.exe - # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) # First target (default goal) -ifneq ($(GPUCC),) -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_main) $(fgpu_main) $(if $(GTESTLIBS),$(testmain)) +ifeq ($(GPUCC),) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_checkmain) $(cxx_fcheckmain) $(if $(GTESTLIBS),$(cxx_testmain)) else -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_main) $(fcxx_main) $(if $(GTESTLIBS),$(testmain)) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_checkmain) $(gpu_fcheckmain) $(if $(GTESTLIBS),$(gpu_testmain)) endif # Target (and build options): debug @@ -643,30 +641,30 @@ $(BUILDDIR)/.build.$(TAG): @if [ "$(oldtagsb)" != "" ]; then echo "Cannot build for tag=$(TAG) as old builds exist for other tags:"; echo " $(oldtagsb)"; echo "Please run 'make clean' first\nIf 'make clean' is not enough: run 'make clean USEBUILDDIR=1 AVX=$(AVX) FPTYPE=$(FPTYPE)' or 'make cleanall'"; exit 1; fi @touch $(BUILDDIR)/.build.$(TAG) -# Apply special build flags only to CrossSectionKernel[_$(GPUSUFFIX)].o (no fast math, see #117 and #516) +# Apply special build flags only to CrossSectionKernel_.o (no fast math, see #117 and #516) # Added edgecase for HIP compilation ifeq ($(shell $(CXX) --version | grep ^nvc++),) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -fno-fast-math $(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math endif -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o (NVTX in timermap.h, #679) -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) +# Apply special build flags only to check_sa_.o (NVTX in timermap.h, #679) +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o and (Cu|Hip)randRandomNumberKernel[_$(GPUSUFFIX)].o -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(RNDCXXFLAGS) +# Apply special build flags only to check_sa_.o and (Cu|Hip)randRandomNumberKernel_.o +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) ifeq ($(HASCURAND),hasCurand) # curand headers, #679 -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUDA_INC) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(CUDA_INC) endif ifeq ($(HASHIPRAND),hasHiprand) # hiprand headers -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(HIP_INC) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(HIP_INC) endif # Avoid "warning: builtin __has_trivial_... is deprecated; use __is_trivially_... instead" in GPUCC with icx2023 (#592) @@ -679,21 +677,21 @@ endif # Avoid clang warning "overriding '-ffp-contract=fast' option with '-ffp-contract=on'" (#516) # This patch does remove the warning, but I prefer to keep it disabled for the moment... ###ifneq ($(shell $(CXX) --version | egrep '^(clang|Apple clang|Intel)'),) -###$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -Wno-overriding-t-option ###ifneq ($(GPUCC),) ###$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option ###endif ###endif #### Apply special build flags only to CPPProcess.o (-flto) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += -flto +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += -flto #### Apply special build flags only to CPPProcess.o (AVXFLAGS) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += $(AVXFLAGS) +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += $(AVXFLAGS) # Generic target and build rules: objects from C++ compilation # (NB do not include CUDA_INC here! add it only for NVTX or curand #679) -$(BUILDDIR)/%%.o : %%.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%%_cpp.o : %%.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -718,8 +716,8 @@ processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') ###$(info processid_short=$(processid_short)) MG5AMC_CXXLIB = mg5amc_$(processid_short)_cpp -cxx_objects_lib=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o -cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel.o $(BUILDDIR)/RamboSamplingKernels.o +cxx_objects_lib=$(BUILDDIR)/CPPProcess_cpp.o $(BUILDDIR)/MatrixElementKernels_cpp.o $(BUILDDIR)/BridgeKernels_cpp.o $(BUILDDIR)/CrossSectionKernels_cpp.o +cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_cpp.o $(BUILDDIR)/RamboSamplingKernels_cpp.o ifneq ($(GPUCC),) MG5AMC_GPULIB = mg5amc_$(processid_short)_$(GPUSUFFIX) @@ -728,8 +726,8 @@ gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/ endif # Target (and build rules): C++ and CUDA/HIP shared libraries -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) @@ -756,63 +754,63 @@ endif #------------------------------------------------------------------------------- # Target (and build rules): C++ and CUDA/HIP standalone executables -$(cxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o - $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(RNDLIBFLAGS) +$(cxx_checkmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_checkmain): $(BUILDDIR)/check_sa_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o + $(CXX) -o $@ $(BUILDDIR)/check_sa_cpp.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o $(RNDLIBFLAGS) ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(gpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(gpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_checkmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_checkmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(gpu_main): LIBFLAGS += -L$(patsubst %%bin/nvc++,%%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_checkmain): LIBFLAGS += -L$(patsubst %%bin/nvc++,%%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(gpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(gpu_main): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o +$(gpu_checkmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_checkmain): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(GPUCC) -o $@ $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(RNDLIBFLAGS) endif #------------------------------------------------------------------------------- # Generic target and build rules: objects from Fortran compilation -$(BUILDDIR)/%%.o : %%.f *.inc +$(BUILDDIR)/%%_fortran.o : %%.f *.inc @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(FC) -I. -c $< -o $@ # Generic target and build rules: objects from Fortran compilation -###$(BUILDDIR)/%%.o : %%.f *.inc +###$(BUILDDIR)/%%_fortran.o : %%.f *.inc ### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi ### @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi ### $(FC) -I. -I$(INCDIR) -c $< -o $@ # Target (and build rules): Fortran standalone executables -###$(BUILDDIR)/fcheck_sa.o : $(INCDIR)/fbridge.inc +###$(BUILDDIR)/fcheck_sa_fortran.o : $(INCDIR)/fbridge.inc ifeq ($(UNAME_S),Darwin) -$(fcxx_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(cxx_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fcxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fcxx_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) +$(cxx_fcheckmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ else - $(CXX) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) + $(CXX) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) endif ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(fgpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(fgpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_fcheckmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_fcheckmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') endif ifeq ($(UNAME_S),Darwin) -$(fgpu_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(gpu_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fgpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fgpu_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) +$(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else - $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) + $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif endif @@ -820,83 +818,94 @@ endif # Target (and build rules): test objects and test executable ifeq ($(GPUCC),) -$(BUILDDIR)/testxxx.o: $(GTESTLIBS) -$(BUILDDIR)/testxxx.o: INCFLAGS += $(GTESTINC) -$(BUILDDIR)/testxxx.o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testxxx.o # Comment out this line to skip the C++ test of xxx functions +$(BUILDDIR)/testxxx_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testxxx_cpp.o: INCFLAGS += $(GTESTINC) +$(BUILDDIR)/testxxx_cpp.o: testxxx_cc_ref.txt +$(cxx_testmain): $(BUILDDIR)/testxxx_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testxxx_cpp.o # Comment out this line to skip the C++ test of xxx functions else $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions +$(gpu_testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions endif ifneq ($(UNAME_S),Darwin) # Disable testmisc on Darwin (workaround for issue #838) ifeq ($(GPUCC),) -$(BUILDDIR)/testmisc.o: $(GTESTLIBS) -$(BUILDDIR)/testmisc.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testmisc.o # Comment out this line to skip the C++ miscellaneous tests +$(BUILDDIR)/testmisc_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testmisc_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/testmisc_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testmisc_cpp.o # Comment out this line to skip the C++ miscellaneous tests else $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests +$(gpu_testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests endif endif ifeq ($(GPUCC),) -$(BUILDDIR)/runTest.o: $(GTESTLIBS) -$(BUILDDIR)/runTest.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/runTest.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/runTest.o +$(BUILDDIR)/runTest_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/runTest_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/runTest_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/runTest_cpp.o else $(BUILDDIR)/runTest_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/runTest_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(testmain): LIBFLAGS += -L$(patsubst %%bin/nvc++,%%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_testmain): LIBFLAGS += -L$(patsubst %%bin/nvc++,%%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o endif -$(testmain): $(GTESTLIBS) -$(testmain): INCFLAGS += $(GTESTINC) -$(testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest -###$(testmain): LIBFLAGS += -lgtest_main # no longer necessary since we added main() to testxxx.cc +ifeq ($(GPUCC),) +$(cxx_testmain): $(GTESTLIBS) +$(cxx_testmain): INCFLAGS += $(GTESTINC) +$(cxx_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +else +$(gpu_testmain): $(GTESTLIBS) +$(gpu_testmain): INCFLAGS += $(GTESTINC) +$(gpu_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +endif +ifeq ($(GPUCC),) # if at all, OMP is used only in CXX builds (not in GPU builds) ifneq ($(OMPFLAGS),) ifneq ($(shell $(CXX) --version | egrep '^Intel'),) -$(testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) +$(cxx_testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) else ifneq ($(shell $(CXX) --version | egrep '^clang'),) -$(testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 +$(cxx_testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 ###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -###$(testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) +###$(cxx_testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) else -$(testmain): LIBFLAGS += -lgomp +$(cxx_testmain): LIBFLAGS += -lgomp +endif endif endif # Test quadmath in testmisc.cc tests for constexpr_math #627 -###$(testmain): LIBFLAGS += -lquadmath +###ifeq ($(GPUCC),) +###$(cxx_testmain): LIBFLAGS += -lquadmath +###else +###$(gpu_testmain): LIBFLAGS += -lquadmath +###endif # Bypass std::filesystem completely to ease portability on LUMI #803 -#ifneq ($(findstring hipcc,$(GPUCC)),) -#$(testmain): LIBFLAGS += -lstdc++fs -#endif +###ifneq ($(findstring hipcc,$(GPUCC)),) +###$(gpu_testmain): LIBFLAGS += -lstdc++fs +###endif -ifeq ($(GPUCC),) # link only runTest.o -$(testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) +ifeq ($(GPUCC),) # link only runTest_cpp.o +$(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) $(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS) -else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest.o and runTest_$(GPUSUFFIX).o) -$(testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) +else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o) +$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else @@ -1051,51 +1060,50 @@ endif #------------------------------------------------------------------------------- -# Target: check (run the C++ test executable) +# Target: check (execute runTest.exe, and compare check.exe with fcheck.exe) # [NB THIS IS WHAT IS USED IN THE GITHUB CI!] # [FIXME: SHOULD CHANGE THE TARGET NAME "check" THAT HAS NOTHING TO DO WITH "check.exe"] -ifneq ($(GPUCC),) -check: runTest cmpFGcheck -else check: runTest cmpFcheck -endif -# Target: runTest (run the C++ test executable runTest.exe) +# Target: runTest (run the C++ or CUDA/HIP test executable runTest.exe) runTest: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/runTest.exe +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/runTest_cpp.exe +else + $(RUNTIME) $(BUILDDIR)/runTest_$(GPUSUFFIX).exe +endif -# Target: runCheck (run the C++ standalone executable check.exe, with a small number of events) +# Target: runCheck (run the C++ or CUDA/HIP standalone executable check.exe, with a small number of events) runCheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/check.exe -p 2 32 2 - -# Target: runGcheck (run the CUDA/HIP standalone executable gcheck.exe, with a small number of events) -runGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/gcheck.exe -p 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/check_cpp.exe -p 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +endif -# Target: runFcheck (run the Fortran standalone executable - with C++ MEs - fcheck.exe, with a small number of events) +# Target: runFcheck (run the Fortran standalone executable - with C++ or CUDA/HIP MEs - fcheck.exe, with a small number of events) runFcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 - -# Target: runFGcheck (run the Fortran standalone executable - with CUDA/HIP MEs - fgcheck.exe, with a small number of events) -runFGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 +endif -# Target: cmpFcheck (compare ME results from the C++ and Fortran with C++ MEs standalone executables, with a small number of events) +# Target: cmpFcheck (compare ME results from the C++/CUDA/HIP and Fortran with C++/CUDA/HIP MEs standalone executables, with a small number of events) cmpFcheck: all.$(TAG) @echo - @echo "$(BUILDDIR)/check.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/check.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%%s (relative difference %%s 2E-4)' %% ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi - -# Target: cmpFGcheck (compare ME results from the CUDA/HIP and Fortran with CUDA/HIP MEs standalone executables, with a small number of events) -cmpFGcheck: all.$(TAG) - @echo - @echo "$(BUILDDIR)/gcheck.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fgcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/gcheck.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%%s (relative difference %%s 2E-4)' %% ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +ifeq ($(GPUCC),) + @echo "$(BUILDDIR)/check_cpp.exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_cpp.exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_cpp.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%%s (relative difference %%s 2E-4)' %% ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +else + @echo "$(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%%s (relative difference %%s 2E-4)' %% ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +endif # Target: cuda-memcheck (run the CUDA standalone executable gcheck.exe with a small number of events through cuda-memcheck) cuda-memcheck: all.$(TAG) - $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/gcheck.exe -p 2 32 2 + $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 #------------------------------------------------------------------------------- From 77cdc0906ea28a3ff9c923bfa2c45c3fd27a8acc Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 15 May 2024 11:47:54 +0200 Subject: [PATCH 15/48] [jtmk2] regenerate gg_tt.mad, check all is ok so far --- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 31 ++++++++++--------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index cc77de1073..e60470ff90 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -2,6 +2,7 @@ This version is intended for development/beta testing and NOT for production. This version has not been fully tested (if at all) and might have limited user support (if at all) Running MG5 in debug mode +('WARNING: loading of madgraph too slow!!!', 1.0581340789794922) ************************************************************ * * * W E L C O M E to * @@ -51,8 +52,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -62,7 +63,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005805015563964844  +DEBUG: model prefixing takes 0.0050716400146484375  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +156,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.009 s +1 processes with 3 diagrams generated in 0.008 s Total: 1 processes with 3 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -178,7 +179,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -198,12 +199,12 @@ Wrote files for 10 helas calls in 0.107 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.154 s +ALOHA: aloha creates 2 routines in 0.126 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.140 s +ALOHA: aloha creates 4 routines in 0.123 s VVV1 FFV1 FFV1 @@ -239,10 +240,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.782s -user 0m1.573s -sys 0m0.205s -Code generation completed in 2 seconds +real 0m2.818s +user 0m2.272s +sys 0m0.292s +Code generation completed in 3 seconds ************************************************************ * * * W E L C O M E to * @@ -267,8 +268,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run quit INFO: @@ -297,8 +298,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards param quit INFO: From 9ecb791b09117ad3b431ff3bd9f623f39e7574f5 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 15 May 2024 12:12:20 +0200 Subject: [PATCH 16/48] [jtmk2] in gg_tt.mad makefile, add suffix cpp or $(GPUSUFFIX) to lib$(MG5AMC_COMMONLIB)_.so --- epochX/cudacpp/gg_tt.mad/SubProcesses/makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile b/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile index 22517d0cc6..6a21300f7a 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile @@ -58,13 +58,15 @@ endif LINKLIBS = $(LINK_MADLOOP_LIB) $(LINK_LOOP_LIBS) -L$(LIBDIR) -ldhelas -ldsample -lmodel -lgeneric -lpdf -lcernlib $(llhapdf) -lbias CUDACPP_MAKEFILE=cudacpp.mk -CUDACPP_COMMONLIB=mg5amc_common processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') ifeq ($(BACKEND),cuda) +CUDACPP_COMMONLIB=mg5amc_common_cuda CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_cuda else ifeq ($(BACKEND),hip) +CUDACPP_COMMONLIB=mg5amc_common_hip CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_hip else +CUDACPP_COMMONLIB=mg5amc_common_cpp CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_cpp endif From 6cbe373c2dae1625a15f7042aa6c0a8411c8fe13 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 15 May 2024 11:59:14 +0200 Subject: [PATCH 17/48] [jtmk2] in gg_tt.mad cudacpp.mk and cudacpp_src.mk: add suffix cpp or $(GPUSUFFIX) to lib$(MG5AMC_COMMONLIB)_.so --- epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 6 +++++- epochX/cudacpp/gg_tt.mad/src/cudacpp_src.mk | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index 403ef34f54..6633f051bf 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -60,7 +60,11 @@ INCFLAGS = -I. OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here # Dependency on src directory -MG5AMC_COMMONLIB = mg5amc_common +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) INCFLAGS += -I../../src diff --git a/epochX/cudacpp/gg_tt.mad/src/cudacpp_src.mk b/epochX/cudacpp/gg_tt.mad/src/cudacpp_src.mk index 4eb0afa3db..d4cc8f4b78 100644 --- a/epochX/cudacpp/gg_tt.mad/src/cudacpp_src.mk +++ b/epochX/cudacpp/gg_tt.mad/src/cudacpp_src.mk @@ -98,7 +98,11 @@ endif # NB1: there are no CUDA targets in src as we avoid RDC! # NB2: CUDA includes for curand.h are no longer needed in the C++ code anywhere in src! -MG5AMC_COMMONLIB = mg5amc_common +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) From 2dfdd3e2b78db57e1e73a45df24fac0a1280df65 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 15 May 2024 12:28:06 +0200 Subject: [PATCH 18/48] [jtmk2] in gg_tt.mad cudacpp.mk: move GPUCC definition as early as possible (so that MG5AMC_COMMONLIB may depend on it) --- .../cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 126 +++++++++--------- 1 file changed, 63 insertions(+), 63 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index 6633f051bf..3eedef0cae 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -54,69 +54,6 @@ endif #------------------------------------------------------------------------------- -#=== Configure common compiler flags for C++ and CUDA/HIP - -INCFLAGS = -I. -OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here - -# Dependency on src directory -ifeq ($(GPUCC),) -MG5AMC_COMMONLIB = mg5amc_common_cpp -else -MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) -endif -LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -INCFLAGS += -I../../src - -# Compiler-specific googletest build directory (#125 and #738) -ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) - override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) -else ifneq ($(shell $(CXX) --version | egrep '^clang'),) - override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) - override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else - override CXXNAME = unknown -endif -###$(info CXXNAME=$(CXXNAME)) -override CXXNAMESUFFIX = _$(CXXNAME) - -# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) -export CXXNAMESUFFIX - -# Dependency on test directory -# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) -# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) -###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation -###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation -TESTDIRCOMMON = ../../../../../test -TESTDIRLOCAL = ../../test -ifneq ($(wildcard $(GTEST_ROOT)),) - TESTDIR = -else ifneq ($(LOCALGTEST),) - TESTDIR=$(TESTDIRLOCAL) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) - TESTDIR = $(TESTDIRCOMMON) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else - TESTDIR = -endif -ifneq ($(GTEST_ROOT),) - GTESTLIBDIR = $(GTEST_ROOT)/lib64/ - GTESTLIBS = $(GTESTLIBDIR)/libgtest.a - GTESTINC = -I$(GTEST_ROOT)/include -else - GTESTLIBDIR = - GTESTLIBS = - GTESTINC = -endif -###$(info GTEST_ROOT = $(GTEST_ROOT)) -###$(info LOCALGTEST = $(LOCALGTEST)) -###$(info TESTDIR = $(TESTDIR)) - -#------------------------------------------------------------------------------- - #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available: eventually use native instead?) @@ -344,6 +281,69 @@ endif #------------------------------------------------------------------------------- +#=== Configure common compiler flags for C++ and CUDA/HIP + +INCFLAGS = -I. +OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here + +# Dependency on src directory +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif +LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +INCFLAGS += -I../../src + +# Compiler-specific googletest build directory (#125 and #738) +ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) + override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) +else ifneq ($(shell $(CXX) --version | egrep '^clang'),) + override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) + override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else + override CXXNAME = unknown +endif +###$(info CXXNAME=$(CXXNAME)) +override CXXNAMESUFFIX = _$(CXXNAME) + +# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) +export CXXNAMESUFFIX + +# Dependency on test directory +# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) +# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) +###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation +###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation +TESTDIRCOMMON = ../../../../../test +TESTDIRLOCAL = ../../test +ifneq ($(wildcard $(GTEST_ROOT)),) + TESTDIR = +else ifneq ($(LOCALGTEST),) + TESTDIR=$(TESTDIRLOCAL) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) + TESTDIR = $(TESTDIRCOMMON) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else + TESTDIR = +endif +ifneq ($(GTEST_ROOT),) + GTESTLIBDIR = $(GTEST_ROOT)/lib64/ + GTESTLIBS = $(GTESTLIBDIR)/libgtest.a + GTESTINC = -I$(GTEST_ROOT)/include +else + GTESTLIBDIR = + GTESTLIBS = + GTESTINC = +endif +###$(info GTEST_ROOT = $(GTEST_ROOT)) +###$(info LOCALGTEST = $(LOCALGTEST)) +###$(info TESTDIR = $(TESTDIR)) + +#------------------------------------------------------------------------------- + #=== Configure PowerPC-specific compiler flags for C++ and CUDA/HIP # PowerPC-specific CXX compiler flags (being reviewed) From 2f74cb96fe2c056d8090944b3a897193295f3d49 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 15 May 2024 12:29:44 +0200 Subject: [PATCH 19/48] [jtmk2] in CODEGEN cudacpp.mk: move GPUCC definition as early as possible (so that MG5AMC_COMMONLIB may depend on it) --- .../iolibs/template_files/gpu/cudacpp.mk | 118 +++++++++--------- 1 file changed, 59 insertions(+), 59 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk index 6babc548d2..b30d60ab0c 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk @@ -54,65 +54,6 @@ endif #------------------------------------------------------------------------------- -#=== Configure common compiler flags for C++ and CUDA/HIP - -INCFLAGS = -I. -OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here - -# Dependency on src directory -MG5AMC_COMMONLIB = mg5amc_common -LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -INCFLAGS += -I../../src - -# Compiler-specific googletest build directory (#125 and #738) -ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) - override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) -else ifneq ($(shell $(CXX) --version | egrep '^clang'),) - override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) - override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else - override CXXNAME = unknown -endif -###$(info CXXNAME=$(CXXNAME)) -override CXXNAMESUFFIX = _$(CXXNAME) - -# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) -export CXXNAMESUFFIX - -# Dependency on test directory -# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) -# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) -###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation -###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation -TESTDIRCOMMON = ../../../../../test -TESTDIRLOCAL = ../../test -ifneq ($(wildcard $(GTEST_ROOT)),) - TESTDIR = -else ifneq ($(LOCALGTEST),) - TESTDIR=$(TESTDIRLOCAL) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) - TESTDIR = $(TESTDIRCOMMON) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else - TESTDIR = -endif -ifneq ($(GTEST_ROOT),) - GTESTLIBDIR = $(GTEST_ROOT)/lib64/ - GTESTLIBS = $(GTESTLIBDIR)/libgtest.a - GTESTINC = -I$(GTEST_ROOT)/include -else - GTESTLIBDIR = - GTESTLIBS = - GTESTINC = -endif -###$(info GTEST_ROOT = $(GTEST_ROOT)) -###$(info LOCALGTEST = $(LOCALGTEST)) -###$(info TESTDIR = $(TESTDIR)) - -#------------------------------------------------------------------------------- - #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available: eventually use native instead?) @@ -340,6 +281,65 @@ endif #------------------------------------------------------------------------------- +#=== Configure common compiler flags for C++ and CUDA/HIP + +INCFLAGS = -I. +OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here + +# Dependency on src directory +MG5AMC_COMMONLIB = mg5amc_common +LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +INCFLAGS += -I../../src + +# Compiler-specific googletest build directory (#125 and #738) +ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) + override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) +else ifneq ($(shell $(CXX) --version | egrep '^clang'),) + override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) + override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else + override CXXNAME = unknown +endif +###$(info CXXNAME=$(CXXNAME)) +override CXXNAMESUFFIX = _$(CXXNAME) + +# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) +export CXXNAMESUFFIX + +# Dependency on test directory +# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) +# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) +###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation +###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation +TESTDIRCOMMON = ../../../../../test +TESTDIRLOCAL = ../../test +ifneq ($(wildcard $(GTEST_ROOT)),) + TESTDIR = +else ifneq ($(LOCALGTEST),) + TESTDIR=$(TESTDIRLOCAL) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) + TESTDIR = $(TESTDIRCOMMON) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else + TESTDIR = +endif +ifneq ($(GTEST_ROOT),) + GTESTLIBDIR = $(GTEST_ROOT)/lib64/ + GTESTLIBS = $(GTESTLIBDIR)/libgtest.a + GTESTINC = -I$(GTEST_ROOT)/include +else + GTESTLIBDIR = + GTESTLIBS = + GTESTINC = +endif +###$(info GTEST_ROOT = $(GTEST_ROOT)) +###$(info LOCALGTEST = $(LOCALGTEST)) +###$(info TESTDIR = $(TESTDIR)) + +#------------------------------------------------------------------------------- + #=== Configure PowerPC-specific compiler flags for C++ and CUDA/HIP # PowerPC-specific CXX compiler flags (being reviewed) From 1b9f167ff12e48d190ddd892a5f1b67be5d2d6a5 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 15 May 2024 12:31:22 +0200 Subject: [PATCH 20/48] [jtmk2] in CODEGEN cudacpp.mk and cudacpp_src.mk: add suffix cpp or $(GPUSUFFIX) to lib$(MG5AMC_COMMONLIB)_.so --- .../madgraph/iolibs/template_files/gpu/cudacpp.mk | 6 +++++- .../madgraph/iolibs/template_files/gpu/cudacpp_src.mk | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk index b30d60ab0c..7652514671 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk @@ -287,7 +287,11 @@ INCFLAGS = -I. OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here # Dependency on src directory -MG5AMC_COMMONLIB = mg5amc_common +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) INCFLAGS += -I../../src diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_src.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_src.mk index 6bfa89ee4a..7ca9eebb23 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_src.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_src.mk @@ -98,7 +98,11 @@ endif # NB1: there are no CUDA targets in src as we avoid RDC! # NB2: CUDA includes for curand.h are no longer needed in the C++ code anywhere in src! -MG5AMC_COMMONLIB = mg5amc_common +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) From f3419b2564c27acde38b51eb26066c5c23d17cb4 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 15 May 2024 12:32:33 +0200 Subject: [PATCH 21/48] [jtmk2] in CODEGEN (recreate patch from gg_tt.mad) makefile, add suffix cpp or $(GPUSUFFIX) to lib$(MG5AMC_COMMONLIB)_.so ./CODEGEN/generateAndCompare.sh gg_tt --mad --nopatch git diff --no-ext-diff -R gg_tt.mad/Source/makefile gg_tt.mad/Source/dsample.f gg_tt.mad/Source/genps.inc gg_tt.mad/SubProcesses/makefile > CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common git diff --no-ext-diff -R gg_tt.mad/bin/internal/banner.py gg_tt.mad/bin/internal/gen_ximprove.py gg_tt.mad/bin/internal/madevent_interface.py >> CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common git diff --no-ext-diff -R gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f gg_tt.mad/SubProcesses/P1_gg_ttx/driver.f gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f > CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 git checkout gg_tt.mad --- .../MG5aMC_patches/PROD/patch.common | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common index 7917d3446a..3cfcc909d9 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common @@ -38,7 +38,7 @@ index 617f10b93..00c73099a 100644 +cleanall: cleanSource # THIS IS THE ONE + for i in `ls -d ../SubProcesses/P*`; do cd $$i; make cleanavxs; cd -; done; diff --git b/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile a/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile -index 348c283be..22517d0cc 100644 +index 348c283be..6a21300f7 100644 --- b/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile +++ a/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile @@ -1,6 +1,37 @@ @@ -79,7 +79,7 @@ index 348c283be..22517d0cc 100644 # Load additional dependencies of the bias module, if present ifeq (,$(wildcard ../bias_dependencies)) BIASDEPENDENCIES = -@@ -24,7 +55,18 @@ else +@@ -24,7 +55,20 @@ else MADLOOP_LIB = endif @@ -87,19 +87,21 @@ index 348c283be..22517d0cc 100644 +LINKLIBS = $(LINK_MADLOOP_LIB) $(LINK_LOOP_LIBS) -L$(LIBDIR) -ldhelas -ldsample -lmodel -lgeneric -lpdf -lcernlib $(llhapdf) -lbias + +CUDACPP_MAKEFILE=cudacpp.mk -+CUDACPP_COMMONLIB=mg5amc_common +processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') +ifeq ($(BACKEND),cuda) ++CUDACPP_COMMONLIB=mg5amc_common_cuda +CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_cuda +else ifeq ($(BACKEND),hip) ++CUDACPP_COMMONLIB=mg5amc_common_hip +CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_hip +else ++CUDACPP_COMMONLIB=mg5amc_common_cpp +CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_cpp +endif LIBS = $(LIBDIR)libbias.$(libext) $(LIBDIR)libdhelas.$(libext) $(LIBDIR)libdsample.$(libext) $(LIBDIR)libgeneric.$(libext) $(LIBDIR)libpdf.$(libext) $(LIBDIR)libgammaUPC.$(libext) $(LIBDIR)libmodel.$(libext) $(LIBDIR)libcernlib.$(libext) $(MADLOOP_LIB) $(LOOP_LIBS) -@@ -43,41 +85,145 @@ ifeq ($(strip $(MATRIX_HEL)),) +@@ -43,41 +87,145 @@ ifeq ($(strip $(MATRIX_HEL)),) endif @@ -261,7 +263,7 @@ index 348c283be..22517d0cc 100644 # Dependencies -@@ -97,5 +243,80 @@ unwgt.o: genps.inc nexternal.inc symswap.inc cluster.inc run.inc message.inc \ +@@ -97,5 +245,80 @@ unwgt.o: genps.inc nexternal.inc symswap.inc cluster.inc run.inc message.inc \ run_config.inc initcluster.o: message.inc From 19d5277e1fd67a00c05297a254d15b71a9a13159 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 15 May 2024 12:33:45 +0200 Subject: [PATCH 22/48] [jtmk2] regenerate gg_tt.mad, check all is ok so far --- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index e60470ff90..d64302d59d 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -2,7 +2,6 @@ This version is intended for development/beta testing and NOT for production. This version has not been fully tested (if at all) and might have limited user support (if at all) Running MG5 in debug mode -('WARNING: loading of madgraph too slow!!!', 1.0581340789794922) ************************************************************ * * * W E L C O M E to * @@ -63,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0050716400146484375  +DEBUG: model prefixing takes 0.004912614822387695  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -179,7 +178,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -195,16 +194,16 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.107 s +Wrote files for 10 helas calls in 0.100 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.126 s +ALOHA: aloha creates 2 routines in 0.137 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.123 s +ALOHA: aloha creates 4 routines in 0.125 s VVV1 FFV1 FFV1 @@ -240,10 +239,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.818s -user 0m2.272s -sys 0m0.292s -Code generation completed in 3 seconds +real 0m1.702s +user 0m1.434s +sys 0m0.234s +Code generation completed in 2 seconds ************************************************************ * * * W E L C O M E to * From 709a35585c1140466cfc77aedcc9cdb71e3aa71f Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 15 May 2024 12:43:53 +0200 Subject: [PATCH 23/48] [jtmk2] in gg_tt.mad cudacpp.mk, finally rename 'make check' as 'make test' --- epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index 3eedef0cae..f8d86fd493 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -1064,10 +1064,10 @@ endif #------------------------------------------------------------------------------- -# Target: check (execute runTest.exe, and compare check.exe with fcheck.exe) -# [NB THIS IS WHAT IS USED IN THE GITHUB CI!] -# [FIXME: SHOULD CHANGE THE TARGET NAME "check" THAT HAS NOTHING TO DO WITH "check.exe"] -check: runTest cmpFcheck +# Target: 'make test' (execute runTest.exe, and compare check.exe with fcheck.exe) +# [NB: THIS IS WHAT IS TESTED IN THE GITHUB CI!] +# [NB: This used to be called 'make check' but the name has been changed as this has nothing to do with 'check.exe'] +test: runTest cmpFcheck # Target: runTest (run the C++ or CUDA/HIP test executable runTest.exe) runTest: all.$(TAG) From b5eba12693a42c925b23d33a5f0e88514bd78682 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 15 May 2024 12:46:08 +0200 Subject: [PATCH 24/48] [jtmk2] in CODEGEN cudacpp.mk, finally rename 'make check' as 'make test' --- .../madgraph/iolibs/template_files/gpu/cudacpp.mk | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk index 7652514671..33de36d1ed 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk @@ -1064,10 +1064,10 @@ endif #------------------------------------------------------------------------------- -# Target: check (execute runTest.exe, and compare check.exe with fcheck.exe) -# [NB THIS IS WHAT IS USED IN THE GITHUB CI!] -# [FIXME: SHOULD CHANGE THE TARGET NAME "check" THAT HAS NOTHING TO DO WITH "check.exe"] -check: runTest cmpFcheck +# Target: 'make test' (execute runTest.exe, and compare check.exe with fcheck.exe) +# [NB: THIS IS WHAT IS TESTED IN THE GITHUB CI!] +# [NB: This used to be called 'make check' but the name has been changed as this has nothing to do with 'check.exe'] +test: runTest cmpFcheck # Target: runTest (run the C++ or CUDA/HIP test executable runTest.exe) runTest: all.$(TAG) From 5b83a1d1566fb38b7a122ce82e8aaeecd1cd8e80 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 15 May 2024 12:46:42 +0200 Subject: [PATCH 25/48] [jtmk2] regenerate gg_tt.mad, check all is ok so far --- epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index d64302d59d..76798bdaa2 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.004912614822387695  +DEBUG: model prefixing takes 0.004954814910888672  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -178,7 +178,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -198,12 +198,12 @@ Wrote files for 10 helas calls in 0.100 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.137 s +ALOHA: aloha creates 2 routines in 0.134 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.125 s +ALOHA: aloha creates 4 routines in 0.123 s VVV1 FFV1 FFV1 @@ -239,9 +239,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.702s -user 0m1.434s -sys 0m0.234s +real 0m1.708s +user 0m1.426s +sys 0m0.233s Code generation completed in 2 seconds ************************************************************ * * From a96d88aec92c4f0110cb6234bf44a850e279c1e9 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 15 May 2024 12:48:02 +0200 Subject: [PATCH 26/48] [jtmk2] in workflows/c-cpp.yml for the github CI, finally rename 'make check' as 'make test' --- .github/workflows/c-cpp.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index f0190b0df8..88df687bd6 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -33,8 +33,8 @@ jobs: run: make BACKEND=${{ matrix.backend }} FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} -f cudacpp.mk info - name: make run: make BACKEND=${{ matrix.backend }} FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} - - name: make check - run: make BACKEND=${{ matrix.backend }} FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} -f cudacpp.mk check + - name: make test + run: make BACKEND=${{ matrix.backend }} FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} -f cudacpp.mk test CPU_MAC: runs-on: macos-latest env: @@ -53,8 +53,8 @@ jobs: run: make BACKEND=${{ matrix.backend }} OMPFLAGS= FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} -f cudacpp.mk info - name: make run: make BACKEND=${{ matrix.backend }} OMPFLAGS= FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} - - name: make check - run: make BACKEND=${{ matrix.backend }} OMPFLAGS= FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} -f cudacpp.mk check + - name: make test + run: make BACKEND=${{ matrix.backend }} OMPFLAGS= FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} -f cudacpp.mk test GPU: runs-on: self-hosted env: @@ -76,5 +76,5 @@ jobs: run: make BACKEND=${{ matrix.backend }} FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} -f cudacpp.mk info - name: make run: make BACKEND=${{ matrix.backend }} FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} - - name: make check - run: make BACKEND=${{ matrix.backend }} FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} -f cudacpp.mk check + - name: make test + run: make BACKEND=${{ matrix.backend }} FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} -f cudacpp.mk test From e0375b1ab3e2cadc76e4486c0f2b0b8e9ddebda0 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 15 May 2024 12:56:19 +0200 Subject: [PATCH 27/48] [jtmk2] regenerate all processes --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 30 +- .../ee_mumu.mad/SubProcesses/cudacpp.mk | 390 +++++++++--------- .../cudacpp/ee_mumu.mad/SubProcesses/makefile | 4 +- epochX/cudacpp/ee_mumu.mad/src/cudacpp_src.mk | 26 +- .../CODEGEN_cudacpp_ee_mumu_log.txt | 20 +- .../ee_mumu.sa/SubProcesses/cudacpp.mk | 390 +++++++++--------- epochX/cudacpp/ee_mumu.sa/src/cudacpp_src.mk | 26 +- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 18 +- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 18 +- .../cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk | 390 +++++++++--------- epochX/cudacpp/gg_tt.sa/src/cudacpp_src.mk | 26 +- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 36 +- .../gg_tt01g.mad/SubProcesses/cudacpp.mk | 390 +++++++++--------- .../gg_tt01g.mad/SubProcesses/makefile | 4 +- .../cudacpp/gg_tt01g.mad/src/cudacpp_src.mk | 26 +- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 34 +- .../gg_ttg.mad/SubProcesses/cudacpp.mk | 390 +++++++++--------- .../cudacpp/gg_ttg.mad/SubProcesses/makefile | 4 +- epochX/cudacpp/gg_ttg.mad/src/cudacpp_src.mk | 26 +- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 18 +- .../cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk | 390 +++++++++--------- epochX/cudacpp/gg_ttg.sa/src/cudacpp_src.mk | 26 +- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 34 +- .../gg_ttgg.mad/SubProcesses/cudacpp.mk | 390 +++++++++--------- .../cudacpp/gg_ttgg.mad/SubProcesses/makefile | 4 +- epochX/cudacpp/gg_ttgg.mad/src/cudacpp_src.mk | 26 +- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 18 +- .../gg_ttgg.sa/SubProcesses/cudacpp.mk | 390 +++++++++--------- epochX/cudacpp/gg_ttgg.sa/src/cudacpp_src.mk | 26 +- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 36 +- .../gg_ttggg.mad/SubProcesses/cudacpp.mk | 390 +++++++++--------- .../gg_ttggg.mad/SubProcesses/makefile | 4 +- .../cudacpp/gg_ttggg.mad/src/cudacpp_src.mk | 26 +- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 20 +- .../gg_ttggg.sa/SubProcesses/cudacpp.mk | 390 +++++++++--------- epochX/cudacpp/gg_ttggg.sa/src/cudacpp_src.mk | 26 +- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 34 +- .../gq_ttq.mad/SubProcesses/cudacpp.mk | 390 +++++++++--------- .../cudacpp/gq_ttq.mad/SubProcesses/makefile | 4 +- epochX/cudacpp/gq_ttq.mad/src/cudacpp_src.mk | 26 +- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 20 +- .../cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk | 390 +++++++++--------- epochX/cudacpp/gq_ttq.sa/src/cudacpp_src.mk | 26 +- .../CODEGEN_mad_heft_gg_bb_log.txt | 28 +- .../heft_gg_bb.mad/SubProcesses/cudacpp.mk | 390 +++++++++--------- .../heft_gg_bb.mad/SubProcesses/makefile | 4 +- .../cudacpp/heft_gg_bb.mad/src/cudacpp_src.mk | 26 +- .../CODEGEN_cudacpp_heft_gg_bb_log.txt | 60 ++- .../heft_gg_bb.sa/SubProcesses/cudacpp.mk | 390 +++++++++--------- .../cudacpp/heft_gg_bb.sa/src/cudacpp_src.mk | 26 +- .../CODEGEN_mad_pp_tt012j_log.txt | 72 ++-- .../pp_tt012j.mad/SubProcesses/cudacpp.mk | 390 +++++++++--------- .../pp_tt012j.mad/SubProcesses/makefile | 4 +- .../cudacpp/pp_tt012j.mad/src/cudacpp_src.mk | 26 +- .../CODEGEN_mad_smeft_gg_tttt_log.txt | 34 +- .../smeft_gg_tttt.mad/SubProcesses/cudacpp.mk | 390 +++++++++--------- .../smeft_gg_tttt.mad/SubProcesses/makefile | 4 +- .../smeft_gg_tttt.mad/src/cudacpp_src.mk | 26 +- .../CODEGEN_cudacpp_smeft_gg_tttt_log.txt | 56 ++- .../smeft_gg_tttt.sa/SubProcesses/cudacpp.mk | 390 +++++++++--------- .../smeft_gg_tttt.sa/src/cudacpp_src.mk | 26 +- .../CODEGEN_mad_susy_gg_t1t1_log.txt | 28 +- .../susy_gg_t1t1.mad/Source/MODEL/input.inc | 142 +++---- .../susy_gg_t1t1.mad/SubProcesses/cudacpp.mk | 390 +++++++++--------- .../susy_gg_t1t1.mad/SubProcesses/makefile | 4 +- .../src/Parameters_MSSM_SLHA2.h | 5 +- .../susy_gg_t1t1.mad/src/cudacpp_src.mk | 26 +- .../CODEGEN_cudacpp_susy_gg_t1t1_log.txt | 16 +- .../susy_gg_t1t1.sa/SubProcesses/cudacpp.mk | 390 +++++++++--------- .../src/Parameters_MSSM_SLHA2.h | 5 +- .../susy_gg_t1t1.sa/src/cudacpp_src.mk | 26 +- .../CODEGEN_mad_susy_gg_tt_log.txt | 28 +- .../susy_gg_tt.mad/Source/MODEL/input.inc | 142 +++---- .../susy_gg_tt.mad/SubProcesses/cudacpp.mk | 390 +++++++++--------- .../susy_gg_tt.mad/SubProcesses/makefile | 4 +- .../src/Parameters_MSSM_SLHA2.h | 5 +- .../cudacpp/susy_gg_tt.mad/src/cudacpp_src.mk | 26 +- .../CODEGEN_cudacpp_susy_gg_tt_log.txt | 19 +- .../susy_gg_tt.sa/SubProcesses/cudacpp.mk | 390 +++++++++--------- .../susy_gg_tt.sa/src/Parameters_MSSM_SLHA2.h | 5 +- .../cudacpp/susy_gg_tt.sa/src/cudacpp_src.mk | 26 +- 81 files changed, 5104 insertions(+), 4657 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index e061610590..9a5327e985 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0058252811431884766  +DEBUG: model prefixing takes 0.005074739456176758  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -154,7 +154,7 @@ INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Process has 2 diagrams -1 processes with 2 diagrams generated in 0.005 s +1 processes with 2 diagrams generated in 0.004 s Total: 1 processes with 2 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_ee_mumu --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -177,7 +177,7 @@ INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 INFO: Creating files in directory P1_epem_mupmum DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -194,19 +194,19 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group epem_mupmum Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -Wrote files for 8 helas calls in 0.105 s +Wrote files for 8 helas calls in 0.099 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.212 s +ALOHA: aloha creates 3 routines in 0.190 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.269 s +ALOHA: aloha creates 7 routines in 0.240 s FFV1 FFV1 FFV2 @@ -250,9 +250,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.972s -user 0m1.747s -sys 0m0.214s +real 0m1.851s +user 0m1.596s +sys 0m0.235s Code generation completed in 2 seconds ************************************************************ * * @@ -278,8 +278,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run quit INFO: @@ -308,8 +308,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards param quit INFO: diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk index f047530438..f8d86fd493 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk @@ -54,65 +54,6 @@ endif #------------------------------------------------------------------------------- -#=== Configure common compiler flags for C++ and CUDA/HIP - -INCFLAGS = -I. -OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here - -# Dependency on src directory -MG5AMC_COMMONLIB = mg5amc_common -LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -INCFLAGS += -I../../src - -# Compiler-specific googletest build directory (#125 and #738) -ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) - override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) -else ifneq ($(shell $(CXX) --version | egrep '^clang'),) - override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) - override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else - override CXXNAME = unknown -endif -###$(info CXXNAME=$(CXXNAME)) -override CXXNAMESUFFIX = _$(CXXNAME) - -# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) -export CXXNAMESUFFIX - -# Dependency on test directory -# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) -# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) -###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation -###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation -TESTDIRCOMMON = ../../../../../test -TESTDIRLOCAL = ../../test -ifneq ($(wildcard $(GTEST_ROOT)),) - TESTDIR = -else ifneq ($(LOCALGTEST),) - TESTDIR=$(TESTDIRLOCAL) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) - TESTDIR = $(TESTDIRCOMMON) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else - TESTDIR = -endif -ifneq ($(GTEST_ROOT),) - GTESTLIBDIR = $(GTEST_ROOT)/lib64/ - GTESTLIBS = $(GTESTLIBDIR)/libgtest.a - GTESTINC = -I$(GTEST_ROOT)/include -else - GTESTLIBDIR = - GTESTLIBS = - GTESTINC = -endif -###$(info GTEST_ROOT = $(GTEST_ROOT)) -###$(info LOCALGTEST = $(LOCALGTEST)) -###$(info TESTDIR = $(TESTDIR)) - -#------------------------------------------------------------------------------- - #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available: eventually use native instead?) @@ -340,6 +281,69 @@ endif #------------------------------------------------------------------------------- +#=== Configure common compiler flags for C++ and CUDA/HIP + +INCFLAGS = -I. +OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here + +# Dependency on src directory +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif +LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +INCFLAGS += -I../../src + +# Compiler-specific googletest build directory (#125 and #738) +ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) + override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) +else ifneq ($(shell $(CXX) --version | egrep '^clang'),) + override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) + override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else + override CXXNAME = unknown +endif +###$(info CXXNAME=$(CXXNAME)) +override CXXNAMESUFFIX = _$(CXXNAME) + +# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) +export CXXNAMESUFFIX + +# Dependency on test directory +# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) +# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) +###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation +###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation +TESTDIRCOMMON = ../../../../../test +TESTDIRLOCAL = ../../test +ifneq ($(wildcard $(GTEST_ROOT)),) + TESTDIR = +else ifneq ($(LOCALGTEST),) + TESTDIR=$(TESTDIRLOCAL) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) + TESTDIR = $(TESTDIRCOMMON) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else + TESTDIR = +endif +ifneq ($(GTEST_ROOT),) + GTESTLIBDIR = $(GTEST_ROOT)/lib64/ + GTESTLIBS = $(GTESTLIBDIR)/libgtest.a + GTESTINC = -I$(GTEST_ROOT)/include +else + GTESTLIBDIR = + GTESTLIBS = + GTESTINC = +endif +###$(info GTEST_ROOT = $(GTEST_ROOT)) +###$(info LOCALGTEST = $(LOCALGTEST)) +###$(info TESTDIR = $(TESTDIR)) + +#------------------------------------------------------------------------------- + #=== Configure PowerPC-specific compiler flags for C++ and CUDA/HIP # PowerPC-specific CXX compiler flags (being reviewed) @@ -606,27 +610,25 @@ override RUNTIME = #=== Makefile TARGETS and build rules below #=============================================================================== -cxx_main=$(BUILDDIR)/check.exe -fcxx_main=$(BUILDDIR)/fcheck.exe -ifneq ($(GPUCC),) - gpu_main=$(BUILDDIR)/gcheck.exe - fgpu_main=$(BUILDDIR)/fgcheck.exe +ifeq ($(GPUCC),) + cxx_checkmain=$(BUILDDIR)/check_cpp.exe + cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe + cxx_testmain=$(BUILDDIR)/runTest_cpp.exe else - gpu_main= - fgpu_main= + gpu_checkmain=$(BUILDDIR)/check_$(GPUSUFFIX).exe + gpu_fcheckmain=$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe + gpu_testmain=$(BUILDDIR)/runTest_$(GPUSUFFIX).exe endif -testmain=$(BUILDDIR)/runTest.exe - # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) # First target (default goal) -ifneq ($(GPUCC),) -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_main) $(fgpu_main) $(if $(GTESTLIBS),$(testmain)) +ifeq ($(GPUCC),) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_checkmain) $(cxx_fcheckmain) $(if $(GTESTLIBS),$(cxx_testmain)) else -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_main) $(fcxx_main) $(if $(GTESTLIBS),$(testmain)) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_checkmain) $(gpu_fcheckmain) $(if $(GTESTLIBS),$(gpu_testmain)) endif # Target (and build options): debug @@ -643,30 +645,30 @@ $(BUILDDIR)/.build.$(TAG): @if [ "$(oldtagsb)" != "" ]; then echo "Cannot build for tag=$(TAG) as old builds exist for other tags:"; echo " $(oldtagsb)"; echo "Please run 'make clean' first\nIf 'make clean' is not enough: run 'make clean USEBUILDDIR=1 AVX=$(AVX) FPTYPE=$(FPTYPE)' or 'make cleanall'"; exit 1; fi @touch $(BUILDDIR)/.build.$(TAG) -# Apply special build flags only to CrossSectionKernel[_$(GPUSUFFIX)].o (no fast math, see #117 and #516) +# Apply special build flags only to CrossSectionKernel_.o (no fast math, see #117 and #516) # Added edgecase for HIP compilation ifeq ($(shell $(CXX) --version | grep ^nvc++),) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -fno-fast-math -$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math endif -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o (NVTX in timermap.h, #679) -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) +# Apply special build flags only to check_sa_.o (NVTX in timermap.h, #679) +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o and (Cu|Hip)randRandomNumberKernel[_$(GPUSUFFIX)].o -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(RNDCXXFLAGS) +# Apply special build flags only to check_sa_.o and (Cu|Hip)randRandomNumberKernel_.o +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) ifeq ($(HASCURAND),hasCurand) # curand headers, #679 -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUDA_INC) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(CUDA_INC) endif ifeq ($(HASHIPRAND),hasHiprand) # hiprand headers -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(HIP_INC) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(HIP_INC) endif # Avoid "warning: builtin __has_trivial_... is deprecated; use __is_trivially_... instead" in GPUCC with icx2023 (#592) @@ -679,21 +681,21 @@ endif # Avoid clang warning "overriding '-ffp-contract=fast' option with '-ffp-contract=on'" (#516) # This patch does remove the warning, but I prefer to keep it disabled for the moment... ###ifneq ($(shell $(CXX) --version | egrep '^(clang|Apple clang|Intel)'),) -###$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -Wno-overriding-t-option ###ifneq ($(GPUCC),) -###$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option ###endif ###endif #### Apply special build flags only to CPPProcess.o (-flto) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += -flto +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += -flto #### Apply special build flags only to CPPProcess.o (AVXFLAGS) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += $(AVXFLAGS) +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += $(AVXFLAGS) # Generic target and build rules: objects from C++ compilation # (NB do not include CUDA_INC here! add it only for NVTX or curand #679) -$(BUILDDIR)/%.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -718,8 +720,8 @@ processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') ###$(info processid_short=$(processid_short)) MG5AMC_CXXLIB = mg5amc_$(processid_short)_cpp -cxx_objects_lib=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o -cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel.o $(BUILDDIR)/RamboSamplingKernels.o +cxx_objects_lib=$(BUILDDIR)/CPPProcess_cpp.o $(BUILDDIR)/MatrixElementKernels_cpp.o $(BUILDDIR)/BridgeKernels_cpp.o $(BUILDDIR)/CrossSectionKernels_cpp.o +cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_cpp.o $(BUILDDIR)/RamboSamplingKernels_cpp.o ifneq ($(GPUCC),) MG5AMC_GPULIB = mg5amc_$(processid_short)_$(GPUSUFFIX) @@ -728,8 +730,8 @@ gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/ endif # Target (and build rules): C++ and CUDA/HIP shared libraries -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) @@ -756,63 +758,63 @@ endif #------------------------------------------------------------------------------- # Target (and build rules): C++ and CUDA/HIP standalone executables -$(cxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o - $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(RNDLIBFLAGS) +$(cxx_checkmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_checkmain): $(BUILDDIR)/check_sa_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o + $(CXX) -o $@ $(BUILDDIR)/check_sa_cpp.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o $(RNDLIBFLAGS) ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(gpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(gpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_checkmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_checkmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(gpu_main): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_checkmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(gpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(gpu_main): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o +$(gpu_checkmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_checkmain): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(GPUCC) -o $@ $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(RNDLIBFLAGS) endif #------------------------------------------------------------------------------- # Generic target and build rules: objects from Fortran compilation -$(BUILDDIR)/%.o : %.f *.inc +$(BUILDDIR)/%_fortran.o : %.f *.inc @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(FC) -I. -c $< -o $@ # Generic target and build rules: objects from Fortran compilation -###$(BUILDDIR)/%.o : %.f *.inc +###$(BUILDDIR)/%_fortran.o : %.f *.inc ### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi ### @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi ### $(FC) -I. -I$(INCDIR) -c $< -o $@ # Target (and build rules): Fortran standalone executables -###$(BUILDDIR)/fcheck_sa.o : $(INCDIR)/fbridge.inc +###$(BUILDDIR)/fcheck_sa_fortran.o : $(INCDIR)/fbridge.inc ifeq ($(UNAME_S),Darwin) -$(fcxx_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(cxx_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fcxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fcxx_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) +$(cxx_fcheckmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ else - $(CXX) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) + $(CXX) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) endif ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(fgpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(fgpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_fcheckmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_fcheckmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') endif ifeq ($(UNAME_S),Darwin) -$(fgpu_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(gpu_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fgpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fgpu_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) +$(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else - $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) + $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif endif @@ -820,83 +822,94 @@ endif # Target (and build rules): test objects and test executable ifeq ($(GPUCC),) -$(BUILDDIR)/testxxx.o: $(GTESTLIBS) -$(BUILDDIR)/testxxx.o: INCFLAGS += $(GTESTINC) -$(BUILDDIR)/testxxx.o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testxxx.o # Comment out this line to skip the C++ test of xxx functions +$(BUILDDIR)/testxxx_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testxxx_cpp.o: INCFLAGS += $(GTESTINC) +$(BUILDDIR)/testxxx_cpp.o: testxxx_cc_ref.txt +$(cxx_testmain): $(BUILDDIR)/testxxx_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testxxx_cpp.o # Comment out this line to skip the C++ test of xxx functions else $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions +$(gpu_testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions endif ifneq ($(UNAME_S),Darwin) # Disable testmisc on Darwin (workaround for issue #838) ifeq ($(GPUCC),) -$(BUILDDIR)/testmisc.o: $(GTESTLIBS) -$(BUILDDIR)/testmisc.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testmisc.o # Comment out this line to skip the C++ miscellaneous tests +$(BUILDDIR)/testmisc_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testmisc_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/testmisc_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testmisc_cpp.o # Comment out this line to skip the C++ miscellaneous tests else $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests +$(gpu_testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests endif endif ifeq ($(GPUCC),) -$(BUILDDIR)/runTest.o: $(GTESTLIBS) -$(BUILDDIR)/runTest.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/runTest.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/runTest.o +$(BUILDDIR)/runTest_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/runTest_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/runTest_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/runTest_cpp.o else $(BUILDDIR)/runTest_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/runTest_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o endif -$(testmain): $(GTESTLIBS) -$(testmain): INCFLAGS += $(GTESTINC) -$(testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest -###$(testmain): LIBFLAGS += -lgtest_main # no longer necessary since we added main() to testxxx.cc +ifeq ($(GPUCC),) +$(cxx_testmain): $(GTESTLIBS) +$(cxx_testmain): INCFLAGS += $(GTESTINC) +$(cxx_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +else +$(gpu_testmain): $(GTESTLIBS) +$(gpu_testmain): INCFLAGS += $(GTESTINC) +$(gpu_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +endif +ifeq ($(GPUCC),) # if at all, OMP is used only in CXX builds (not in GPU builds) ifneq ($(OMPFLAGS),) ifneq ($(shell $(CXX) --version | egrep '^Intel'),) -$(testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) +$(cxx_testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) else ifneq ($(shell $(CXX) --version | egrep '^clang'),) -$(testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 +$(cxx_testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 ###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -###$(testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) +###$(cxx_testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) else -$(testmain): LIBFLAGS += -lgomp +$(cxx_testmain): LIBFLAGS += -lgomp +endif endif endif # Test quadmath in testmisc.cc tests for constexpr_math #627 -###$(testmain): LIBFLAGS += -lquadmath +###ifeq ($(GPUCC),) +###$(cxx_testmain): LIBFLAGS += -lquadmath +###else +###$(gpu_testmain): LIBFLAGS += -lquadmath +###endif # Bypass std::filesystem completely to ease portability on LUMI #803 -#ifneq ($(findstring hipcc,$(GPUCC)),) -#$(testmain): LIBFLAGS += -lstdc++fs -#endif +###ifneq ($(findstring hipcc,$(GPUCC)),) +###$(gpu_testmain): LIBFLAGS += -lstdc++fs +###endif -ifeq ($(GPUCC),) # link only runTest.o -$(testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) +ifeq ($(GPUCC),) # link only runTest_cpp.o +$(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) $(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS) -else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest.o and runTest_$(GPUSUFFIX).o) -$(testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) +else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o) +$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else @@ -1051,51 +1064,50 @@ endif #------------------------------------------------------------------------------- -# Target: check (run the C++ test executable) -# [NB THIS IS WHAT IS USED IN THE GITHUB CI!] -# [FIXME: SHOULD CHANGE THE TARGET NAME "check" THAT HAS NOTHING TO DO WITH "check.exe"] -ifneq ($(GPUCC),) -check: runTest cmpFGcheck -else -check: runTest cmpFcheck -endif +# Target: 'make test' (execute runTest.exe, and compare check.exe with fcheck.exe) +# [NB: THIS IS WHAT IS TESTED IN THE GITHUB CI!] +# [NB: This used to be called 'make check' but the name has been changed as this has nothing to do with 'check.exe'] +test: runTest cmpFcheck -# Target: runTest (run the C++ test executable runTest.exe) +# Target: runTest (run the C++ or CUDA/HIP test executable runTest.exe) runTest: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/runTest.exe +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/runTest_cpp.exe +else + $(RUNTIME) $(BUILDDIR)/runTest_$(GPUSUFFIX).exe +endif -# Target: runCheck (run the C++ standalone executable check.exe, with a small number of events) +# Target: runCheck (run the C++ or CUDA/HIP standalone executable check.exe, with a small number of events) runCheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/check.exe -p 2 32 2 - -# Target: runGcheck (run the CUDA/HIP standalone executable gcheck.exe, with a small number of events) -runGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/gcheck.exe -p 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/check_cpp.exe -p 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +endif -# Target: runFcheck (run the Fortran standalone executable - with C++ MEs - fcheck.exe, with a small number of events) +# Target: runFcheck (run the Fortran standalone executable - with C++ or CUDA/HIP MEs - fcheck.exe, with a small number of events) runFcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 - -# Target: runFGcheck (run the Fortran standalone executable - with CUDA/HIP MEs - fgcheck.exe, with a small number of events) -runFGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 +endif -# Target: cmpFcheck (compare ME results from the C++ and Fortran with C++ MEs standalone executables, with a small number of events) +# Target: cmpFcheck (compare ME results from the C++/CUDA/HIP and Fortran with C++/CUDA/HIP MEs standalone executables, with a small number of events) cmpFcheck: all.$(TAG) @echo - @echo "$(BUILDDIR)/check.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/check.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi - -# Target: cmpFGcheck (compare ME results from the CUDA/HIP and Fortran with CUDA/HIP MEs standalone executables, with a small number of events) -cmpFGcheck: all.$(TAG) - @echo - @echo "$(BUILDDIR)/gcheck.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fgcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/gcheck.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +ifeq ($(GPUCC),) + @echo "$(BUILDDIR)/check_cpp.exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_cpp.exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_cpp.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +else + @echo "$(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +endif # Target: cuda-memcheck (run the CUDA standalone executable gcheck.exe with a small number of events through cuda-memcheck) cuda-memcheck: all.$(TAG) - $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/gcheck.exe -p 2 32 2 + $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/makefile b/epochX/cudacpp/ee_mumu.mad/SubProcesses/makefile index 22517d0cc6..6a21300f7a 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/makefile +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/makefile @@ -58,13 +58,15 @@ endif LINKLIBS = $(LINK_MADLOOP_LIB) $(LINK_LOOP_LIBS) -L$(LIBDIR) -ldhelas -ldsample -lmodel -lgeneric -lpdf -lcernlib $(llhapdf) -lbias CUDACPP_MAKEFILE=cudacpp.mk -CUDACPP_COMMONLIB=mg5amc_common processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') ifeq ($(BACKEND),cuda) +CUDACPP_COMMONLIB=mg5amc_common_cuda CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_cuda else ifeq ($(BACKEND),hip) +CUDACPP_COMMONLIB=mg5amc_common_hip CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_hip else +CUDACPP_COMMONLIB=mg5amc_common_cpp CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_cpp endif diff --git a/epochX/cudacpp/ee_mumu.mad/src/cudacpp_src.mk b/epochX/cudacpp/ee_mumu.mad/src/cudacpp_src.mk index f727c0b313..d4cc8f4b78 100644 --- a/epochX/cudacpp/ee_mumu.mad/src/cudacpp_src.mk +++ b/epochX/cudacpp/ee_mumu.mad/src/cudacpp_src.mk @@ -98,7 +98,11 @@ endif # NB1: there are no CUDA targets in src as we avoid RDC! # NB2: CUDA includes for curand.h are no longer needed in the C++ code anywhere in src! -MG5AMC_COMMONLIB = mg5amc_common +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) @@ -126,7 +130,7 @@ $(LIBDIR)/.build.$(TAG): #------------------------------------------------------------------------------- # Generic target and build rules: objects from C++ compilation -$(BUILDDIR)/%.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -139,22 +143,22 @@ endif #------------------------------------------------------------------------------- -cxx_objects=$(addprefix $(BUILDDIR)/, read_slha.o) -ifneq ($(GPUCC),) - gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_sm_$(GPUSUFFIX).o) +cxx_objects=$(addprefix $(BUILDDIR)/, read_slha_cpp.o) +ifeq ($(GPUCC),) + cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_sm_cpp.o) else - cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_sm.o) + gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_sm_$(GPUSUFFIX).o) endif # Target (and build rules): common (src) library -ifneq ($(GPUCC),) -$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) - @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) -else +ifeq ($(GPUCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) +else +$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) + @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi + $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index 9d92f3f8d9..c84ee401ac 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005830049514770508  +DEBUG: model prefixing takes 0.00506591796875  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -154,7 +154,7 @@ INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Process has 2 diagrams -1 processes with 2 diagrams generated in 0.005 s +1 processes with 2 diagrams generated in 0.004 s Total: 1 processes with 2 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_ee_mumu Load PLUGIN.CUDACPP_OUTPUT @@ -177,14 +177,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/. -Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s +Generated helas calls for 1 subprocesses (2 diagrams) in 0.003 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.283 s +ALOHA: aloha creates 4 routines in 0.251 s FFV1 FFV1 FFV2 @@ -203,7 +203,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. quit -real 0m0.729s -user 0m0.624s -sys 0m0.060s -Code generation completed in 1 seconds +real 0m0.656s +user 0m0.570s +sys 0m0.059s +Code generation completed in 0 seconds diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk index f047530438..f8d86fd493 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk @@ -54,65 +54,6 @@ endif #------------------------------------------------------------------------------- -#=== Configure common compiler flags for C++ and CUDA/HIP - -INCFLAGS = -I. -OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here - -# Dependency on src directory -MG5AMC_COMMONLIB = mg5amc_common -LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -INCFLAGS += -I../../src - -# Compiler-specific googletest build directory (#125 and #738) -ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) - override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) -else ifneq ($(shell $(CXX) --version | egrep '^clang'),) - override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) - override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else - override CXXNAME = unknown -endif -###$(info CXXNAME=$(CXXNAME)) -override CXXNAMESUFFIX = _$(CXXNAME) - -# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) -export CXXNAMESUFFIX - -# Dependency on test directory -# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) -# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) -###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation -###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation -TESTDIRCOMMON = ../../../../../test -TESTDIRLOCAL = ../../test -ifneq ($(wildcard $(GTEST_ROOT)),) - TESTDIR = -else ifneq ($(LOCALGTEST),) - TESTDIR=$(TESTDIRLOCAL) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) - TESTDIR = $(TESTDIRCOMMON) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else - TESTDIR = -endif -ifneq ($(GTEST_ROOT),) - GTESTLIBDIR = $(GTEST_ROOT)/lib64/ - GTESTLIBS = $(GTESTLIBDIR)/libgtest.a - GTESTINC = -I$(GTEST_ROOT)/include -else - GTESTLIBDIR = - GTESTLIBS = - GTESTINC = -endif -###$(info GTEST_ROOT = $(GTEST_ROOT)) -###$(info LOCALGTEST = $(LOCALGTEST)) -###$(info TESTDIR = $(TESTDIR)) - -#------------------------------------------------------------------------------- - #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available: eventually use native instead?) @@ -340,6 +281,69 @@ endif #------------------------------------------------------------------------------- +#=== Configure common compiler flags for C++ and CUDA/HIP + +INCFLAGS = -I. +OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here + +# Dependency on src directory +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif +LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +INCFLAGS += -I../../src + +# Compiler-specific googletest build directory (#125 and #738) +ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) + override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) +else ifneq ($(shell $(CXX) --version | egrep '^clang'),) + override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) + override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else + override CXXNAME = unknown +endif +###$(info CXXNAME=$(CXXNAME)) +override CXXNAMESUFFIX = _$(CXXNAME) + +# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) +export CXXNAMESUFFIX + +# Dependency on test directory +# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) +# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) +###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation +###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation +TESTDIRCOMMON = ../../../../../test +TESTDIRLOCAL = ../../test +ifneq ($(wildcard $(GTEST_ROOT)),) + TESTDIR = +else ifneq ($(LOCALGTEST),) + TESTDIR=$(TESTDIRLOCAL) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) + TESTDIR = $(TESTDIRCOMMON) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else + TESTDIR = +endif +ifneq ($(GTEST_ROOT),) + GTESTLIBDIR = $(GTEST_ROOT)/lib64/ + GTESTLIBS = $(GTESTLIBDIR)/libgtest.a + GTESTINC = -I$(GTEST_ROOT)/include +else + GTESTLIBDIR = + GTESTLIBS = + GTESTINC = +endif +###$(info GTEST_ROOT = $(GTEST_ROOT)) +###$(info LOCALGTEST = $(LOCALGTEST)) +###$(info TESTDIR = $(TESTDIR)) + +#------------------------------------------------------------------------------- + #=== Configure PowerPC-specific compiler flags for C++ and CUDA/HIP # PowerPC-specific CXX compiler flags (being reviewed) @@ -606,27 +610,25 @@ override RUNTIME = #=== Makefile TARGETS and build rules below #=============================================================================== -cxx_main=$(BUILDDIR)/check.exe -fcxx_main=$(BUILDDIR)/fcheck.exe -ifneq ($(GPUCC),) - gpu_main=$(BUILDDIR)/gcheck.exe - fgpu_main=$(BUILDDIR)/fgcheck.exe +ifeq ($(GPUCC),) + cxx_checkmain=$(BUILDDIR)/check_cpp.exe + cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe + cxx_testmain=$(BUILDDIR)/runTest_cpp.exe else - gpu_main= - fgpu_main= + gpu_checkmain=$(BUILDDIR)/check_$(GPUSUFFIX).exe + gpu_fcheckmain=$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe + gpu_testmain=$(BUILDDIR)/runTest_$(GPUSUFFIX).exe endif -testmain=$(BUILDDIR)/runTest.exe - # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) # First target (default goal) -ifneq ($(GPUCC),) -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_main) $(fgpu_main) $(if $(GTESTLIBS),$(testmain)) +ifeq ($(GPUCC),) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_checkmain) $(cxx_fcheckmain) $(if $(GTESTLIBS),$(cxx_testmain)) else -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_main) $(fcxx_main) $(if $(GTESTLIBS),$(testmain)) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_checkmain) $(gpu_fcheckmain) $(if $(GTESTLIBS),$(gpu_testmain)) endif # Target (and build options): debug @@ -643,30 +645,30 @@ $(BUILDDIR)/.build.$(TAG): @if [ "$(oldtagsb)" != "" ]; then echo "Cannot build for tag=$(TAG) as old builds exist for other tags:"; echo " $(oldtagsb)"; echo "Please run 'make clean' first\nIf 'make clean' is not enough: run 'make clean USEBUILDDIR=1 AVX=$(AVX) FPTYPE=$(FPTYPE)' or 'make cleanall'"; exit 1; fi @touch $(BUILDDIR)/.build.$(TAG) -# Apply special build flags only to CrossSectionKernel[_$(GPUSUFFIX)].o (no fast math, see #117 and #516) +# Apply special build flags only to CrossSectionKernel_.o (no fast math, see #117 and #516) # Added edgecase for HIP compilation ifeq ($(shell $(CXX) --version | grep ^nvc++),) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -fno-fast-math -$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math endif -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o (NVTX in timermap.h, #679) -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) +# Apply special build flags only to check_sa_.o (NVTX in timermap.h, #679) +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o and (Cu|Hip)randRandomNumberKernel[_$(GPUSUFFIX)].o -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(RNDCXXFLAGS) +# Apply special build flags only to check_sa_.o and (Cu|Hip)randRandomNumberKernel_.o +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) ifeq ($(HASCURAND),hasCurand) # curand headers, #679 -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUDA_INC) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(CUDA_INC) endif ifeq ($(HASHIPRAND),hasHiprand) # hiprand headers -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(HIP_INC) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(HIP_INC) endif # Avoid "warning: builtin __has_trivial_... is deprecated; use __is_trivially_... instead" in GPUCC with icx2023 (#592) @@ -679,21 +681,21 @@ endif # Avoid clang warning "overriding '-ffp-contract=fast' option with '-ffp-contract=on'" (#516) # This patch does remove the warning, but I prefer to keep it disabled for the moment... ###ifneq ($(shell $(CXX) --version | egrep '^(clang|Apple clang|Intel)'),) -###$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -Wno-overriding-t-option ###ifneq ($(GPUCC),) -###$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option ###endif ###endif #### Apply special build flags only to CPPProcess.o (-flto) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += -flto +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += -flto #### Apply special build flags only to CPPProcess.o (AVXFLAGS) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += $(AVXFLAGS) +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += $(AVXFLAGS) # Generic target and build rules: objects from C++ compilation # (NB do not include CUDA_INC here! add it only for NVTX or curand #679) -$(BUILDDIR)/%.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -718,8 +720,8 @@ processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') ###$(info processid_short=$(processid_short)) MG5AMC_CXXLIB = mg5amc_$(processid_short)_cpp -cxx_objects_lib=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o -cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel.o $(BUILDDIR)/RamboSamplingKernels.o +cxx_objects_lib=$(BUILDDIR)/CPPProcess_cpp.o $(BUILDDIR)/MatrixElementKernels_cpp.o $(BUILDDIR)/BridgeKernels_cpp.o $(BUILDDIR)/CrossSectionKernels_cpp.o +cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_cpp.o $(BUILDDIR)/RamboSamplingKernels_cpp.o ifneq ($(GPUCC),) MG5AMC_GPULIB = mg5amc_$(processid_short)_$(GPUSUFFIX) @@ -728,8 +730,8 @@ gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/ endif # Target (and build rules): C++ and CUDA/HIP shared libraries -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) @@ -756,63 +758,63 @@ endif #------------------------------------------------------------------------------- # Target (and build rules): C++ and CUDA/HIP standalone executables -$(cxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o - $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(RNDLIBFLAGS) +$(cxx_checkmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_checkmain): $(BUILDDIR)/check_sa_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o + $(CXX) -o $@ $(BUILDDIR)/check_sa_cpp.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o $(RNDLIBFLAGS) ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(gpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(gpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_checkmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_checkmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(gpu_main): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_checkmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(gpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(gpu_main): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o +$(gpu_checkmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_checkmain): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(GPUCC) -o $@ $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(RNDLIBFLAGS) endif #------------------------------------------------------------------------------- # Generic target and build rules: objects from Fortran compilation -$(BUILDDIR)/%.o : %.f *.inc +$(BUILDDIR)/%_fortran.o : %.f *.inc @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(FC) -I. -c $< -o $@ # Generic target and build rules: objects from Fortran compilation -###$(BUILDDIR)/%.o : %.f *.inc +###$(BUILDDIR)/%_fortran.o : %.f *.inc ### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi ### @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi ### $(FC) -I. -I$(INCDIR) -c $< -o $@ # Target (and build rules): Fortran standalone executables -###$(BUILDDIR)/fcheck_sa.o : $(INCDIR)/fbridge.inc +###$(BUILDDIR)/fcheck_sa_fortran.o : $(INCDIR)/fbridge.inc ifeq ($(UNAME_S),Darwin) -$(fcxx_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(cxx_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fcxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fcxx_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) +$(cxx_fcheckmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ else - $(CXX) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) + $(CXX) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) endif ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(fgpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(fgpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_fcheckmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_fcheckmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') endif ifeq ($(UNAME_S),Darwin) -$(fgpu_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(gpu_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fgpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fgpu_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) +$(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else - $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) + $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif endif @@ -820,83 +822,94 @@ endif # Target (and build rules): test objects and test executable ifeq ($(GPUCC),) -$(BUILDDIR)/testxxx.o: $(GTESTLIBS) -$(BUILDDIR)/testxxx.o: INCFLAGS += $(GTESTINC) -$(BUILDDIR)/testxxx.o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testxxx.o # Comment out this line to skip the C++ test of xxx functions +$(BUILDDIR)/testxxx_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testxxx_cpp.o: INCFLAGS += $(GTESTINC) +$(BUILDDIR)/testxxx_cpp.o: testxxx_cc_ref.txt +$(cxx_testmain): $(BUILDDIR)/testxxx_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testxxx_cpp.o # Comment out this line to skip the C++ test of xxx functions else $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions +$(gpu_testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions endif ifneq ($(UNAME_S),Darwin) # Disable testmisc on Darwin (workaround for issue #838) ifeq ($(GPUCC),) -$(BUILDDIR)/testmisc.o: $(GTESTLIBS) -$(BUILDDIR)/testmisc.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testmisc.o # Comment out this line to skip the C++ miscellaneous tests +$(BUILDDIR)/testmisc_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testmisc_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/testmisc_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testmisc_cpp.o # Comment out this line to skip the C++ miscellaneous tests else $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests +$(gpu_testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests endif endif ifeq ($(GPUCC),) -$(BUILDDIR)/runTest.o: $(GTESTLIBS) -$(BUILDDIR)/runTest.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/runTest.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/runTest.o +$(BUILDDIR)/runTest_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/runTest_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/runTest_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/runTest_cpp.o else $(BUILDDIR)/runTest_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/runTest_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o endif -$(testmain): $(GTESTLIBS) -$(testmain): INCFLAGS += $(GTESTINC) -$(testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest -###$(testmain): LIBFLAGS += -lgtest_main # no longer necessary since we added main() to testxxx.cc +ifeq ($(GPUCC),) +$(cxx_testmain): $(GTESTLIBS) +$(cxx_testmain): INCFLAGS += $(GTESTINC) +$(cxx_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +else +$(gpu_testmain): $(GTESTLIBS) +$(gpu_testmain): INCFLAGS += $(GTESTINC) +$(gpu_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +endif +ifeq ($(GPUCC),) # if at all, OMP is used only in CXX builds (not in GPU builds) ifneq ($(OMPFLAGS),) ifneq ($(shell $(CXX) --version | egrep '^Intel'),) -$(testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) +$(cxx_testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) else ifneq ($(shell $(CXX) --version | egrep '^clang'),) -$(testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 +$(cxx_testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 ###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -###$(testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) +###$(cxx_testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) else -$(testmain): LIBFLAGS += -lgomp +$(cxx_testmain): LIBFLAGS += -lgomp +endif endif endif # Test quadmath in testmisc.cc tests for constexpr_math #627 -###$(testmain): LIBFLAGS += -lquadmath +###ifeq ($(GPUCC),) +###$(cxx_testmain): LIBFLAGS += -lquadmath +###else +###$(gpu_testmain): LIBFLAGS += -lquadmath +###endif # Bypass std::filesystem completely to ease portability on LUMI #803 -#ifneq ($(findstring hipcc,$(GPUCC)),) -#$(testmain): LIBFLAGS += -lstdc++fs -#endif +###ifneq ($(findstring hipcc,$(GPUCC)),) +###$(gpu_testmain): LIBFLAGS += -lstdc++fs +###endif -ifeq ($(GPUCC),) # link only runTest.o -$(testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) +ifeq ($(GPUCC),) # link only runTest_cpp.o +$(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) $(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS) -else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest.o and runTest_$(GPUSUFFIX).o) -$(testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) +else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o) +$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else @@ -1051,51 +1064,50 @@ endif #------------------------------------------------------------------------------- -# Target: check (run the C++ test executable) -# [NB THIS IS WHAT IS USED IN THE GITHUB CI!] -# [FIXME: SHOULD CHANGE THE TARGET NAME "check" THAT HAS NOTHING TO DO WITH "check.exe"] -ifneq ($(GPUCC),) -check: runTest cmpFGcheck -else -check: runTest cmpFcheck -endif +# Target: 'make test' (execute runTest.exe, and compare check.exe with fcheck.exe) +# [NB: THIS IS WHAT IS TESTED IN THE GITHUB CI!] +# [NB: This used to be called 'make check' but the name has been changed as this has nothing to do with 'check.exe'] +test: runTest cmpFcheck -# Target: runTest (run the C++ test executable runTest.exe) +# Target: runTest (run the C++ or CUDA/HIP test executable runTest.exe) runTest: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/runTest.exe +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/runTest_cpp.exe +else + $(RUNTIME) $(BUILDDIR)/runTest_$(GPUSUFFIX).exe +endif -# Target: runCheck (run the C++ standalone executable check.exe, with a small number of events) +# Target: runCheck (run the C++ or CUDA/HIP standalone executable check.exe, with a small number of events) runCheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/check.exe -p 2 32 2 - -# Target: runGcheck (run the CUDA/HIP standalone executable gcheck.exe, with a small number of events) -runGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/gcheck.exe -p 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/check_cpp.exe -p 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +endif -# Target: runFcheck (run the Fortran standalone executable - with C++ MEs - fcheck.exe, with a small number of events) +# Target: runFcheck (run the Fortran standalone executable - with C++ or CUDA/HIP MEs - fcheck.exe, with a small number of events) runFcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 - -# Target: runFGcheck (run the Fortran standalone executable - with CUDA/HIP MEs - fgcheck.exe, with a small number of events) -runFGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 +endif -# Target: cmpFcheck (compare ME results from the C++ and Fortran with C++ MEs standalone executables, with a small number of events) +# Target: cmpFcheck (compare ME results from the C++/CUDA/HIP and Fortran with C++/CUDA/HIP MEs standalone executables, with a small number of events) cmpFcheck: all.$(TAG) @echo - @echo "$(BUILDDIR)/check.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/check.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi - -# Target: cmpFGcheck (compare ME results from the CUDA/HIP and Fortran with CUDA/HIP MEs standalone executables, with a small number of events) -cmpFGcheck: all.$(TAG) - @echo - @echo "$(BUILDDIR)/gcheck.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fgcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/gcheck.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +ifeq ($(GPUCC),) + @echo "$(BUILDDIR)/check_cpp.exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_cpp.exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_cpp.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +else + @echo "$(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +endif # Target: cuda-memcheck (run the CUDA standalone executable gcheck.exe with a small number of events through cuda-memcheck) cuda-memcheck: all.$(TAG) - $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/gcheck.exe -p 2 32 2 + $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/ee_mumu.sa/src/cudacpp_src.mk b/epochX/cudacpp/ee_mumu.sa/src/cudacpp_src.mk index f727c0b313..d4cc8f4b78 100644 --- a/epochX/cudacpp/ee_mumu.sa/src/cudacpp_src.mk +++ b/epochX/cudacpp/ee_mumu.sa/src/cudacpp_src.mk @@ -98,7 +98,11 @@ endif # NB1: there are no CUDA targets in src as we avoid RDC! # NB2: CUDA includes for curand.h are no longer needed in the C++ code anywhere in src! -MG5AMC_COMMONLIB = mg5amc_common +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) @@ -126,7 +130,7 @@ $(LIBDIR)/.build.$(TAG): #------------------------------------------------------------------------------- # Generic target and build rules: objects from C++ compilation -$(BUILDDIR)/%.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -139,22 +143,22 @@ endif #------------------------------------------------------------------------------- -cxx_objects=$(addprefix $(BUILDDIR)/, read_slha.o) -ifneq ($(GPUCC),) - gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_sm_$(GPUSUFFIX).o) +cxx_objects=$(addprefix $(BUILDDIR)/, read_slha_cpp.o) +ifeq ($(GPUCC),) + cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_sm_cpp.o) else - cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_sm.o) + gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_sm_$(GPUSUFFIX).o) endif # Target (and build rules): common (src) library -ifneq ($(GPUCC),) -$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) - @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) -else +ifeq ($(GPUCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) +else +$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) + @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi + $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index 76798bdaa2..d7e99ccf73 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.004954814910888672  +DEBUG: model prefixing takes 0.005176544189453125  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -178,7 +178,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -194,16 +194,16 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.100 s +Wrote files for 10 helas calls in 0.098 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.134 s +ALOHA: aloha creates 2 routines in 0.135 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.123 s +ALOHA: aloha creates 4 routines in 0.121 s VVV1 FFV1 FFV1 @@ -239,10 +239,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.708s -user 0m1.426s -sys 0m0.233s -Code generation completed in 2 seconds +real 0m1.651s +user 0m1.428s +sys 0m0.220s +Code generation completed in 1 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index fc60349507..eb517b045c 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005868434906005859  +DEBUG: model prefixing takes 0.005257606506347656  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.009 s +1 processes with 3 diagrams generated in 0.008 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_tt Load PLUGIN.CUDACPP_OUTPUT @@ -183,7 +183,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.154 s +ALOHA: aloha creates 2 routines in 0.146 s VVV1 FFV1 FFV1 @@ -198,7 +198,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. quit -real 0m0.562s -user 0m0.507s -sys 0m0.052s -Code generation completed in 0 seconds +real 0m0.544s +user 0m0.492s +sys 0m0.047s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk index f047530438..f8d86fd493 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk @@ -54,65 +54,6 @@ endif #------------------------------------------------------------------------------- -#=== Configure common compiler flags for C++ and CUDA/HIP - -INCFLAGS = -I. -OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here - -# Dependency on src directory -MG5AMC_COMMONLIB = mg5amc_common -LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -INCFLAGS += -I../../src - -# Compiler-specific googletest build directory (#125 and #738) -ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) - override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) -else ifneq ($(shell $(CXX) --version | egrep '^clang'),) - override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) - override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else - override CXXNAME = unknown -endif -###$(info CXXNAME=$(CXXNAME)) -override CXXNAMESUFFIX = _$(CXXNAME) - -# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) -export CXXNAMESUFFIX - -# Dependency on test directory -# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) -# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) -###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation -###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation -TESTDIRCOMMON = ../../../../../test -TESTDIRLOCAL = ../../test -ifneq ($(wildcard $(GTEST_ROOT)),) - TESTDIR = -else ifneq ($(LOCALGTEST),) - TESTDIR=$(TESTDIRLOCAL) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) - TESTDIR = $(TESTDIRCOMMON) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else - TESTDIR = -endif -ifneq ($(GTEST_ROOT),) - GTESTLIBDIR = $(GTEST_ROOT)/lib64/ - GTESTLIBS = $(GTESTLIBDIR)/libgtest.a - GTESTINC = -I$(GTEST_ROOT)/include -else - GTESTLIBDIR = - GTESTLIBS = - GTESTINC = -endif -###$(info GTEST_ROOT = $(GTEST_ROOT)) -###$(info LOCALGTEST = $(LOCALGTEST)) -###$(info TESTDIR = $(TESTDIR)) - -#------------------------------------------------------------------------------- - #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available: eventually use native instead?) @@ -340,6 +281,69 @@ endif #------------------------------------------------------------------------------- +#=== Configure common compiler flags for C++ and CUDA/HIP + +INCFLAGS = -I. +OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here + +# Dependency on src directory +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif +LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +INCFLAGS += -I../../src + +# Compiler-specific googletest build directory (#125 and #738) +ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) + override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) +else ifneq ($(shell $(CXX) --version | egrep '^clang'),) + override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) + override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else + override CXXNAME = unknown +endif +###$(info CXXNAME=$(CXXNAME)) +override CXXNAMESUFFIX = _$(CXXNAME) + +# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) +export CXXNAMESUFFIX + +# Dependency on test directory +# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) +# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) +###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation +###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation +TESTDIRCOMMON = ../../../../../test +TESTDIRLOCAL = ../../test +ifneq ($(wildcard $(GTEST_ROOT)),) + TESTDIR = +else ifneq ($(LOCALGTEST),) + TESTDIR=$(TESTDIRLOCAL) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) + TESTDIR = $(TESTDIRCOMMON) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else + TESTDIR = +endif +ifneq ($(GTEST_ROOT),) + GTESTLIBDIR = $(GTEST_ROOT)/lib64/ + GTESTLIBS = $(GTESTLIBDIR)/libgtest.a + GTESTINC = -I$(GTEST_ROOT)/include +else + GTESTLIBDIR = + GTESTLIBS = + GTESTINC = +endif +###$(info GTEST_ROOT = $(GTEST_ROOT)) +###$(info LOCALGTEST = $(LOCALGTEST)) +###$(info TESTDIR = $(TESTDIR)) + +#------------------------------------------------------------------------------- + #=== Configure PowerPC-specific compiler flags for C++ and CUDA/HIP # PowerPC-specific CXX compiler flags (being reviewed) @@ -606,27 +610,25 @@ override RUNTIME = #=== Makefile TARGETS and build rules below #=============================================================================== -cxx_main=$(BUILDDIR)/check.exe -fcxx_main=$(BUILDDIR)/fcheck.exe -ifneq ($(GPUCC),) - gpu_main=$(BUILDDIR)/gcheck.exe - fgpu_main=$(BUILDDIR)/fgcheck.exe +ifeq ($(GPUCC),) + cxx_checkmain=$(BUILDDIR)/check_cpp.exe + cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe + cxx_testmain=$(BUILDDIR)/runTest_cpp.exe else - gpu_main= - fgpu_main= + gpu_checkmain=$(BUILDDIR)/check_$(GPUSUFFIX).exe + gpu_fcheckmain=$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe + gpu_testmain=$(BUILDDIR)/runTest_$(GPUSUFFIX).exe endif -testmain=$(BUILDDIR)/runTest.exe - # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) # First target (default goal) -ifneq ($(GPUCC),) -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_main) $(fgpu_main) $(if $(GTESTLIBS),$(testmain)) +ifeq ($(GPUCC),) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_checkmain) $(cxx_fcheckmain) $(if $(GTESTLIBS),$(cxx_testmain)) else -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_main) $(fcxx_main) $(if $(GTESTLIBS),$(testmain)) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_checkmain) $(gpu_fcheckmain) $(if $(GTESTLIBS),$(gpu_testmain)) endif # Target (and build options): debug @@ -643,30 +645,30 @@ $(BUILDDIR)/.build.$(TAG): @if [ "$(oldtagsb)" != "" ]; then echo "Cannot build for tag=$(TAG) as old builds exist for other tags:"; echo " $(oldtagsb)"; echo "Please run 'make clean' first\nIf 'make clean' is not enough: run 'make clean USEBUILDDIR=1 AVX=$(AVX) FPTYPE=$(FPTYPE)' or 'make cleanall'"; exit 1; fi @touch $(BUILDDIR)/.build.$(TAG) -# Apply special build flags only to CrossSectionKernel[_$(GPUSUFFIX)].o (no fast math, see #117 and #516) +# Apply special build flags only to CrossSectionKernel_.o (no fast math, see #117 and #516) # Added edgecase for HIP compilation ifeq ($(shell $(CXX) --version | grep ^nvc++),) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -fno-fast-math -$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math endif -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o (NVTX in timermap.h, #679) -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) +# Apply special build flags only to check_sa_.o (NVTX in timermap.h, #679) +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o and (Cu|Hip)randRandomNumberKernel[_$(GPUSUFFIX)].o -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(RNDCXXFLAGS) +# Apply special build flags only to check_sa_.o and (Cu|Hip)randRandomNumberKernel_.o +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) ifeq ($(HASCURAND),hasCurand) # curand headers, #679 -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUDA_INC) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(CUDA_INC) endif ifeq ($(HASHIPRAND),hasHiprand) # hiprand headers -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(HIP_INC) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(HIP_INC) endif # Avoid "warning: builtin __has_trivial_... is deprecated; use __is_trivially_... instead" in GPUCC with icx2023 (#592) @@ -679,21 +681,21 @@ endif # Avoid clang warning "overriding '-ffp-contract=fast' option with '-ffp-contract=on'" (#516) # This patch does remove the warning, but I prefer to keep it disabled for the moment... ###ifneq ($(shell $(CXX) --version | egrep '^(clang|Apple clang|Intel)'),) -###$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -Wno-overriding-t-option ###ifneq ($(GPUCC),) -###$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option ###endif ###endif #### Apply special build flags only to CPPProcess.o (-flto) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += -flto +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += -flto #### Apply special build flags only to CPPProcess.o (AVXFLAGS) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += $(AVXFLAGS) +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += $(AVXFLAGS) # Generic target and build rules: objects from C++ compilation # (NB do not include CUDA_INC here! add it only for NVTX or curand #679) -$(BUILDDIR)/%.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -718,8 +720,8 @@ processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') ###$(info processid_short=$(processid_short)) MG5AMC_CXXLIB = mg5amc_$(processid_short)_cpp -cxx_objects_lib=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o -cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel.o $(BUILDDIR)/RamboSamplingKernels.o +cxx_objects_lib=$(BUILDDIR)/CPPProcess_cpp.o $(BUILDDIR)/MatrixElementKernels_cpp.o $(BUILDDIR)/BridgeKernels_cpp.o $(BUILDDIR)/CrossSectionKernels_cpp.o +cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_cpp.o $(BUILDDIR)/RamboSamplingKernels_cpp.o ifneq ($(GPUCC),) MG5AMC_GPULIB = mg5amc_$(processid_short)_$(GPUSUFFIX) @@ -728,8 +730,8 @@ gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/ endif # Target (and build rules): C++ and CUDA/HIP shared libraries -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) @@ -756,63 +758,63 @@ endif #------------------------------------------------------------------------------- # Target (and build rules): C++ and CUDA/HIP standalone executables -$(cxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o - $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(RNDLIBFLAGS) +$(cxx_checkmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_checkmain): $(BUILDDIR)/check_sa_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o + $(CXX) -o $@ $(BUILDDIR)/check_sa_cpp.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o $(RNDLIBFLAGS) ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(gpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(gpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_checkmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_checkmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(gpu_main): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_checkmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(gpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(gpu_main): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o +$(gpu_checkmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_checkmain): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(GPUCC) -o $@ $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(RNDLIBFLAGS) endif #------------------------------------------------------------------------------- # Generic target and build rules: objects from Fortran compilation -$(BUILDDIR)/%.o : %.f *.inc +$(BUILDDIR)/%_fortran.o : %.f *.inc @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(FC) -I. -c $< -o $@ # Generic target and build rules: objects from Fortran compilation -###$(BUILDDIR)/%.o : %.f *.inc +###$(BUILDDIR)/%_fortran.o : %.f *.inc ### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi ### @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi ### $(FC) -I. -I$(INCDIR) -c $< -o $@ # Target (and build rules): Fortran standalone executables -###$(BUILDDIR)/fcheck_sa.o : $(INCDIR)/fbridge.inc +###$(BUILDDIR)/fcheck_sa_fortran.o : $(INCDIR)/fbridge.inc ifeq ($(UNAME_S),Darwin) -$(fcxx_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(cxx_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fcxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fcxx_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) +$(cxx_fcheckmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ else - $(CXX) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) + $(CXX) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) endif ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(fgpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(fgpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_fcheckmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_fcheckmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') endif ifeq ($(UNAME_S),Darwin) -$(fgpu_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(gpu_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fgpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fgpu_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) +$(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else - $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) + $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif endif @@ -820,83 +822,94 @@ endif # Target (and build rules): test objects and test executable ifeq ($(GPUCC),) -$(BUILDDIR)/testxxx.o: $(GTESTLIBS) -$(BUILDDIR)/testxxx.o: INCFLAGS += $(GTESTINC) -$(BUILDDIR)/testxxx.o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testxxx.o # Comment out this line to skip the C++ test of xxx functions +$(BUILDDIR)/testxxx_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testxxx_cpp.o: INCFLAGS += $(GTESTINC) +$(BUILDDIR)/testxxx_cpp.o: testxxx_cc_ref.txt +$(cxx_testmain): $(BUILDDIR)/testxxx_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testxxx_cpp.o # Comment out this line to skip the C++ test of xxx functions else $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions +$(gpu_testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions endif ifneq ($(UNAME_S),Darwin) # Disable testmisc on Darwin (workaround for issue #838) ifeq ($(GPUCC),) -$(BUILDDIR)/testmisc.o: $(GTESTLIBS) -$(BUILDDIR)/testmisc.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testmisc.o # Comment out this line to skip the C++ miscellaneous tests +$(BUILDDIR)/testmisc_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testmisc_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/testmisc_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testmisc_cpp.o # Comment out this line to skip the C++ miscellaneous tests else $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests +$(gpu_testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests endif endif ifeq ($(GPUCC),) -$(BUILDDIR)/runTest.o: $(GTESTLIBS) -$(BUILDDIR)/runTest.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/runTest.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/runTest.o +$(BUILDDIR)/runTest_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/runTest_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/runTest_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/runTest_cpp.o else $(BUILDDIR)/runTest_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/runTest_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o endif -$(testmain): $(GTESTLIBS) -$(testmain): INCFLAGS += $(GTESTINC) -$(testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest -###$(testmain): LIBFLAGS += -lgtest_main # no longer necessary since we added main() to testxxx.cc +ifeq ($(GPUCC),) +$(cxx_testmain): $(GTESTLIBS) +$(cxx_testmain): INCFLAGS += $(GTESTINC) +$(cxx_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +else +$(gpu_testmain): $(GTESTLIBS) +$(gpu_testmain): INCFLAGS += $(GTESTINC) +$(gpu_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +endif +ifeq ($(GPUCC),) # if at all, OMP is used only in CXX builds (not in GPU builds) ifneq ($(OMPFLAGS),) ifneq ($(shell $(CXX) --version | egrep '^Intel'),) -$(testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) +$(cxx_testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) else ifneq ($(shell $(CXX) --version | egrep '^clang'),) -$(testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 +$(cxx_testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 ###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -###$(testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) +###$(cxx_testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) else -$(testmain): LIBFLAGS += -lgomp +$(cxx_testmain): LIBFLAGS += -lgomp +endif endif endif # Test quadmath in testmisc.cc tests for constexpr_math #627 -###$(testmain): LIBFLAGS += -lquadmath +###ifeq ($(GPUCC),) +###$(cxx_testmain): LIBFLAGS += -lquadmath +###else +###$(gpu_testmain): LIBFLAGS += -lquadmath +###endif # Bypass std::filesystem completely to ease portability on LUMI #803 -#ifneq ($(findstring hipcc,$(GPUCC)),) -#$(testmain): LIBFLAGS += -lstdc++fs -#endif +###ifneq ($(findstring hipcc,$(GPUCC)),) +###$(gpu_testmain): LIBFLAGS += -lstdc++fs +###endif -ifeq ($(GPUCC),) # link only runTest.o -$(testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) +ifeq ($(GPUCC),) # link only runTest_cpp.o +$(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) $(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS) -else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest.o and runTest_$(GPUSUFFIX).o) -$(testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) +else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o) +$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else @@ -1051,51 +1064,50 @@ endif #------------------------------------------------------------------------------- -# Target: check (run the C++ test executable) -# [NB THIS IS WHAT IS USED IN THE GITHUB CI!] -# [FIXME: SHOULD CHANGE THE TARGET NAME "check" THAT HAS NOTHING TO DO WITH "check.exe"] -ifneq ($(GPUCC),) -check: runTest cmpFGcheck -else -check: runTest cmpFcheck -endif +# Target: 'make test' (execute runTest.exe, and compare check.exe with fcheck.exe) +# [NB: THIS IS WHAT IS TESTED IN THE GITHUB CI!] +# [NB: This used to be called 'make check' but the name has been changed as this has nothing to do with 'check.exe'] +test: runTest cmpFcheck -# Target: runTest (run the C++ test executable runTest.exe) +# Target: runTest (run the C++ or CUDA/HIP test executable runTest.exe) runTest: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/runTest.exe +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/runTest_cpp.exe +else + $(RUNTIME) $(BUILDDIR)/runTest_$(GPUSUFFIX).exe +endif -# Target: runCheck (run the C++ standalone executable check.exe, with a small number of events) +# Target: runCheck (run the C++ or CUDA/HIP standalone executable check.exe, with a small number of events) runCheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/check.exe -p 2 32 2 - -# Target: runGcheck (run the CUDA/HIP standalone executable gcheck.exe, with a small number of events) -runGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/gcheck.exe -p 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/check_cpp.exe -p 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +endif -# Target: runFcheck (run the Fortran standalone executable - with C++ MEs - fcheck.exe, with a small number of events) +# Target: runFcheck (run the Fortran standalone executable - with C++ or CUDA/HIP MEs - fcheck.exe, with a small number of events) runFcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 - -# Target: runFGcheck (run the Fortran standalone executable - with CUDA/HIP MEs - fgcheck.exe, with a small number of events) -runFGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 +endif -# Target: cmpFcheck (compare ME results from the C++ and Fortran with C++ MEs standalone executables, with a small number of events) +# Target: cmpFcheck (compare ME results from the C++/CUDA/HIP and Fortran with C++/CUDA/HIP MEs standalone executables, with a small number of events) cmpFcheck: all.$(TAG) @echo - @echo "$(BUILDDIR)/check.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/check.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi - -# Target: cmpFGcheck (compare ME results from the CUDA/HIP and Fortran with CUDA/HIP MEs standalone executables, with a small number of events) -cmpFGcheck: all.$(TAG) - @echo - @echo "$(BUILDDIR)/gcheck.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fgcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/gcheck.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +ifeq ($(GPUCC),) + @echo "$(BUILDDIR)/check_cpp.exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_cpp.exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_cpp.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +else + @echo "$(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +endif # Target: cuda-memcheck (run the CUDA standalone executable gcheck.exe with a small number of events through cuda-memcheck) cuda-memcheck: all.$(TAG) - $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/gcheck.exe -p 2 32 2 + $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_tt.sa/src/cudacpp_src.mk b/epochX/cudacpp/gg_tt.sa/src/cudacpp_src.mk index f727c0b313..d4cc8f4b78 100644 --- a/epochX/cudacpp/gg_tt.sa/src/cudacpp_src.mk +++ b/epochX/cudacpp/gg_tt.sa/src/cudacpp_src.mk @@ -98,7 +98,11 @@ endif # NB1: there are no CUDA targets in src as we avoid RDC! # NB2: CUDA includes for curand.h are no longer needed in the C++ code anywhere in src! -MG5AMC_COMMONLIB = mg5amc_common +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) @@ -126,7 +130,7 @@ $(LIBDIR)/.build.$(TAG): #------------------------------------------------------------------------------- # Generic target and build rules: objects from C++ compilation -$(BUILDDIR)/%.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -139,22 +143,22 @@ endif #------------------------------------------------------------------------------- -cxx_objects=$(addprefix $(BUILDDIR)/, read_slha.o) -ifneq ($(GPUCC),) - gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_sm_$(GPUSUFFIX).o) +cxx_objects=$(addprefix $(BUILDDIR)/, read_slha_cpp.o) +ifeq ($(GPUCC),) + cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_sm_cpp.o) else - cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_sm.o) + gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_sm_$(GPUSUFFIX).o) endif # Target (and build rules): common (src) library -ifneq ($(GPUCC),) -$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) - @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) -else +ifeq ($(GPUCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) +else +$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) + @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi + $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index b9564eac61..85093dfd4e 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005810976028442383  +DEBUG: model prefixing takes 0.004948139190673828  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.009 s +1 processes with 3 diagrams generated in 0.008 s Total: 1 processes with 3 diagrams add process g g > t t~ g INFO: Checking for minimal orders which gives processes. @@ -163,7 +163,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @2 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.021 s +1 processes with 16 diagrams generated in 0.019 s Total: 2 processes with 19 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt01g --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -188,7 +188,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P2_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -205,7 +205,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -220,15 +220,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx -Generated helas calls for 2 subprocesses (19 diagrams) in 0.046 s -Wrote files for 46 helas calls in 0.258 s +Generated helas calls for 2 subprocesses (19 diagrams) in 0.041 s +Wrote files for 46 helas calls in 0.239 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.348 s +ALOHA: aloha creates 5 routines in 0.304 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -236,7 +236,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.331 s +ALOHA: aloha creates 10 routines in 0.286 s VVV1 VVV1 FFV1 @@ -285,9 +285,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.416s -user 0m2.182s -sys 0m0.234s +real 0m2.217s +user 0m1.962s +sys 0m0.249s Code generation completed in 2 seconds ************************************************************ * * @@ -313,8 +313,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run quit INFO: @@ -343,8 +343,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards param quit INFO: diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk index f047530438..f8d86fd493 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk @@ -54,65 +54,6 @@ endif #------------------------------------------------------------------------------- -#=== Configure common compiler flags for C++ and CUDA/HIP - -INCFLAGS = -I. -OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here - -# Dependency on src directory -MG5AMC_COMMONLIB = mg5amc_common -LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -INCFLAGS += -I../../src - -# Compiler-specific googletest build directory (#125 and #738) -ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) - override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) -else ifneq ($(shell $(CXX) --version | egrep '^clang'),) - override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) - override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else - override CXXNAME = unknown -endif -###$(info CXXNAME=$(CXXNAME)) -override CXXNAMESUFFIX = _$(CXXNAME) - -# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) -export CXXNAMESUFFIX - -# Dependency on test directory -# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) -# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) -###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation -###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation -TESTDIRCOMMON = ../../../../../test -TESTDIRLOCAL = ../../test -ifneq ($(wildcard $(GTEST_ROOT)),) - TESTDIR = -else ifneq ($(LOCALGTEST),) - TESTDIR=$(TESTDIRLOCAL) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) - TESTDIR = $(TESTDIRCOMMON) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else - TESTDIR = -endif -ifneq ($(GTEST_ROOT),) - GTESTLIBDIR = $(GTEST_ROOT)/lib64/ - GTESTLIBS = $(GTESTLIBDIR)/libgtest.a - GTESTINC = -I$(GTEST_ROOT)/include -else - GTESTLIBDIR = - GTESTLIBS = - GTESTINC = -endif -###$(info GTEST_ROOT = $(GTEST_ROOT)) -###$(info LOCALGTEST = $(LOCALGTEST)) -###$(info TESTDIR = $(TESTDIR)) - -#------------------------------------------------------------------------------- - #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available: eventually use native instead?) @@ -340,6 +281,69 @@ endif #------------------------------------------------------------------------------- +#=== Configure common compiler flags for C++ and CUDA/HIP + +INCFLAGS = -I. +OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here + +# Dependency on src directory +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif +LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +INCFLAGS += -I../../src + +# Compiler-specific googletest build directory (#125 and #738) +ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) + override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) +else ifneq ($(shell $(CXX) --version | egrep '^clang'),) + override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) + override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else + override CXXNAME = unknown +endif +###$(info CXXNAME=$(CXXNAME)) +override CXXNAMESUFFIX = _$(CXXNAME) + +# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) +export CXXNAMESUFFIX + +# Dependency on test directory +# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) +# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) +###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation +###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation +TESTDIRCOMMON = ../../../../../test +TESTDIRLOCAL = ../../test +ifneq ($(wildcard $(GTEST_ROOT)),) + TESTDIR = +else ifneq ($(LOCALGTEST),) + TESTDIR=$(TESTDIRLOCAL) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) + TESTDIR = $(TESTDIRCOMMON) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else + TESTDIR = +endif +ifneq ($(GTEST_ROOT),) + GTESTLIBDIR = $(GTEST_ROOT)/lib64/ + GTESTLIBS = $(GTESTLIBDIR)/libgtest.a + GTESTINC = -I$(GTEST_ROOT)/include +else + GTESTLIBDIR = + GTESTLIBS = + GTESTINC = +endif +###$(info GTEST_ROOT = $(GTEST_ROOT)) +###$(info LOCALGTEST = $(LOCALGTEST)) +###$(info TESTDIR = $(TESTDIR)) + +#------------------------------------------------------------------------------- + #=== Configure PowerPC-specific compiler flags for C++ and CUDA/HIP # PowerPC-specific CXX compiler flags (being reviewed) @@ -606,27 +610,25 @@ override RUNTIME = #=== Makefile TARGETS and build rules below #=============================================================================== -cxx_main=$(BUILDDIR)/check.exe -fcxx_main=$(BUILDDIR)/fcheck.exe -ifneq ($(GPUCC),) - gpu_main=$(BUILDDIR)/gcheck.exe - fgpu_main=$(BUILDDIR)/fgcheck.exe +ifeq ($(GPUCC),) + cxx_checkmain=$(BUILDDIR)/check_cpp.exe + cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe + cxx_testmain=$(BUILDDIR)/runTest_cpp.exe else - gpu_main= - fgpu_main= + gpu_checkmain=$(BUILDDIR)/check_$(GPUSUFFIX).exe + gpu_fcheckmain=$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe + gpu_testmain=$(BUILDDIR)/runTest_$(GPUSUFFIX).exe endif -testmain=$(BUILDDIR)/runTest.exe - # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) # First target (default goal) -ifneq ($(GPUCC),) -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_main) $(fgpu_main) $(if $(GTESTLIBS),$(testmain)) +ifeq ($(GPUCC),) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_checkmain) $(cxx_fcheckmain) $(if $(GTESTLIBS),$(cxx_testmain)) else -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_main) $(fcxx_main) $(if $(GTESTLIBS),$(testmain)) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_checkmain) $(gpu_fcheckmain) $(if $(GTESTLIBS),$(gpu_testmain)) endif # Target (and build options): debug @@ -643,30 +645,30 @@ $(BUILDDIR)/.build.$(TAG): @if [ "$(oldtagsb)" != "" ]; then echo "Cannot build for tag=$(TAG) as old builds exist for other tags:"; echo " $(oldtagsb)"; echo "Please run 'make clean' first\nIf 'make clean' is not enough: run 'make clean USEBUILDDIR=1 AVX=$(AVX) FPTYPE=$(FPTYPE)' or 'make cleanall'"; exit 1; fi @touch $(BUILDDIR)/.build.$(TAG) -# Apply special build flags only to CrossSectionKernel[_$(GPUSUFFIX)].o (no fast math, see #117 and #516) +# Apply special build flags only to CrossSectionKernel_.o (no fast math, see #117 and #516) # Added edgecase for HIP compilation ifeq ($(shell $(CXX) --version | grep ^nvc++),) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -fno-fast-math -$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math endif -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o (NVTX in timermap.h, #679) -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) +# Apply special build flags only to check_sa_.o (NVTX in timermap.h, #679) +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o and (Cu|Hip)randRandomNumberKernel[_$(GPUSUFFIX)].o -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(RNDCXXFLAGS) +# Apply special build flags only to check_sa_.o and (Cu|Hip)randRandomNumberKernel_.o +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) ifeq ($(HASCURAND),hasCurand) # curand headers, #679 -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUDA_INC) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(CUDA_INC) endif ifeq ($(HASHIPRAND),hasHiprand) # hiprand headers -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(HIP_INC) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(HIP_INC) endif # Avoid "warning: builtin __has_trivial_... is deprecated; use __is_trivially_... instead" in GPUCC with icx2023 (#592) @@ -679,21 +681,21 @@ endif # Avoid clang warning "overriding '-ffp-contract=fast' option with '-ffp-contract=on'" (#516) # This patch does remove the warning, but I prefer to keep it disabled for the moment... ###ifneq ($(shell $(CXX) --version | egrep '^(clang|Apple clang|Intel)'),) -###$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -Wno-overriding-t-option ###ifneq ($(GPUCC),) -###$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option ###endif ###endif #### Apply special build flags only to CPPProcess.o (-flto) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += -flto +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += -flto #### Apply special build flags only to CPPProcess.o (AVXFLAGS) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += $(AVXFLAGS) +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += $(AVXFLAGS) # Generic target and build rules: objects from C++ compilation # (NB do not include CUDA_INC here! add it only for NVTX or curand #679) -$(BUILDDIR)/%.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -718,8 +720,8 @@ processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') ###$(info processid_short=$(processid_short)) MG5AMC_CXXLIB = mg5amc_$(processid_short)_cpp -cxx_objects_lib=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o -cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel.o $(BUILDDIR)/RamboSamplingKernels.o +cxx_objects_lib=$(BUILDDIR)/CPPProcess_cpp.o $(BUILDDIR)/MatrixElementKernels_cpp.o $(BUILDDIR)/BridgeKernels_cpp.o $(BUILDDIR)/CrossSectionKernels_cpp.o +cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_cpp.o $(BUILDDIR)/RamboSamplingKernels_cpp.o ifneq ($(GPUCC),) MG5AMC_GPULIB = mg5amc_$(processid_short)_$(GPUSUFFIX) @@ -728,8 +730,8 @@ gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/ endif # Target (and build rules): C++ and CUDA/HIP shared libraries -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) @@ -756,63 +758,63 @@ endif #------------------------------------------------------------------------------- # Target (and build rules): C++ and CUDA/HIP standalone executables -$(cxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o - $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(RNDLIBFLAGS) +$(cxx_checkmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_checkmain): $(BUILDDIR)/check_sa_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o + $(CXX) -o $@ $(BUILDDIR)/check_sa_cpp.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o $(RNDLIBFLAGS) ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(gpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(gpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_checkmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_checkmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(gpu_main): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_checkmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(gpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(gpu_main): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o +$(gpu_checkmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_checkmain): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(GPUCC) -o $@ $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(RNDLIBFLAGS) endif #------------------------------------------------------------------------------- # Generic target and build rules: objects from Fortran compilation -$(BUILDDIR)/%.o : %.f *.inc +$(BUILDDIR)/%_fortran.o : %.f *.inc @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(FC) -I. -c $< -o $@ # Generic target and build rules: objects from Fortran compilation -###$(BUILDDIR)/%.o : %.f *.inc +###$(BUILDDIR)/%_fortran.o : %.f *.inc ### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi ### @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi ### $(FC) -I. -I$(INCDIR) -c $< -o $@ # Target (and build rules): Fortran standalone executables -###$(BUILDDIR)/fcheck_sa.o : $(INCDIR)/fbridge.inc +###$(BUILDDIR)/fcheck_sa_fortran.o : $(INCDIR)/fbridge.inc ifeq ($(UNAME_S),Darwin) -$(fcxx_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(cxx_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fcxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fcxx_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) +$(cxx_fcheckmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ else - $(CXX) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) + $(CXX) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) endif ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(fgpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(fgpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_fcheckmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_fcheckmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') endif ifeq ($(UNAME_S),Darwin) -$(fgpu_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(gpu_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fgpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fgpu_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) +$(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else - $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) + $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif endif @@ -820,83 +822,94 @@ endif # Target (and build rules): test objects and test executable ifeq ($(GPUCC),) -$(BUILDDIR)/testxxx.o: $(GTESTLIBS) -$(BUILDDIR)/testxxx.o: INCFLAGS += $(GTESTINC) -$(BUILDDIR)/testxxx.o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testxxx.o # Comment out this line to skip the C++ test of xxx functions +$(BUILDDIR)/testxxx_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testxxx_cpp.o: INCFLAGS += $(GTESTINC) +$(BUILDDIR)/testxxx_cpp.o: testxxx_cc_ref.txt +$(cxx_testmain): $(BUILDDIR)/testxxx_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testxxx_cpp.o # Comment out this line to skip the C++ test of xxx functions else $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions +$(gpu_testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions endif ifneq ($(UNAME_S),Darwin) # Disable testmisc on Darwin (workaround for issue #838) ifeq ($(GPUCC),) -$(BUILDDIR)/testmisc.o: $(GTESTLIBS) -$(BUILDDIR)/testmisc.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testmisc.o # Comment out this line to skip the C++ miscellaneous tests +$(BUILDDIR)/testmisc_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testmisc_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/testmisc_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testmisc_cpp.o # Comment out this line to skip the C++ miscellaneous tests else $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests +$(gpu_testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests endif endif ifeq ($(GPUCC),) -$(BUILDDIR)/runTest.o: $(GTESTLIBS) -$(BUILDDIR)/runTest.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/runTest.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/runTest.o +$(BUILDDIR)/runTest_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/runTest_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/runTest_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/runTest_cpp.o else $(BUILDDIR)/runTest_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/runTest_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o endif -$(testmain): $(GTESTLIBS) -$(testmain): INCFLAGS += $(GTESTINC) -$(testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest -###$(testmain): LIBFLAGS += -lgtest_main # no longer necessary since we added main() to testxxx.cc +ifeq ($(GPUCC),) +$(cxx_testmain): $(GTESTLIBS) +$(cxx_testmain): INCFLAGS += $(GTESTINC) +$(cxx_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +else +$(gpu_testmain): $(GTESTLIBS) +$(gpu_testmain): INCFLAGS += $(GTESTINC) +$(gpu_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +endif +ifeq ($(GPUCC),) # if at all, OMP is used only in CXX builds (not in GPU builds) ifneq ($(OMPFLAGS),) ifneq ($(shell $(CXX) --version | egrep '^Intel'),) -$(testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) +$(cxx_testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) else ifneq ($(shell $(CXX) --version | egrep '^clang'),) -$(testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 +$(cxx_testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 ###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -###$(testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) +###$(cxx_testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) else -$(testmain): LIBFLAGS += -lgomp +$(cxx_testmain): LIBFLAGS += -lgomp +endif endif endif # Test quadmath in testmisc.cc tests for constexpr_math #627 -###$(testmain): LIBFLAGS += -lquadmath +###ifeq ($(GPUCC),) +###$(cxx_testmain): LIBFLAGS += -lquadmath +###else +###$(gpu_testmain): LIBFLAGS += -lquadmath +###endif # Bypass std::filesystem completely to ease portability on LUMI #803 -#ifneq ($(findstring hipcc,$(GPUCC)),) -#$(testmain): LIBFLAGS += -lstdc++fs -#endif +###ifneq ($(findstring hipcc,$(GPUCC)),) +###$(gpu_testmain): LIBFLAGS += -lstdc++fs +###endif -ifeq ($(GPUCC),) # link only runTest.o -$(testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) +ifeq ($(GPUCC),) # link only runTest_cpp.o +$(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) $(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS) -else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest.o and runTest_$(GPUSUFFIX).o) -$(testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) +else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o) +$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else @@ -1051,51 +1064,50 @@ endif #------------------------------------------------------------------------------- -# Target: check (run the C++ test executable) -# [NB THIS IS WHAT IS USED IN THE GITHUB CI!] -# [FIXME: SHOULD CHANGE THE TARGET NAME "check" THAT HAS NOTHING TO DO WITH "check.exe"] -ifneq ($(GPUCC),) -check: runTest cmpFGcheck -else -check: runTest cmpFcheck -endif +# Target: 'make test' (execute runTest.exe, and compare check.exe with fcheck.exe) +# [NB: THIS IS WHAT IS TESTED IN THE GITHUB CI!] +# [NB: This used to be called 'make check' but the name has been changed as this has nothing to do with 'check.exe'] +test: runTest cmpFcheck -# Target: runTest (run the C++ test executable runTest.exe) +# Target: runTest (run the C++ or CUDA/HIP test executable runTest.exe) runTest: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/runTest.exe +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/runTest_cpp.exe +else + $(RUNTIME) $(BUILDDIR)/runTest_$(GPUSUFFIX).exe +endif -# Target: runCheck (run the C++ standalone executable check.exe, with a small number of events) +# Target: runCheck (run the C++ or CUDA/HIP standalone executable check.exe, with a small number of events) runCheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/check.exe -p 2 32 2 - -# Target: runGcheck (run the CUDA/HIP standalone executable gcheck.exe, with a small number of events) -runGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/gcheck.exe -p 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/check_cpp.exe -p 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +endif -# Target: runFcheck (run the Fortran standalone executable - with C++ MEs - fcheck.exe, with a small number of events) +# Target: runFcheck (run the Fortran standalone executable - with C++ or CUDA/HIP MEs - fcheck.exe, with a small number of events) runFcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 - -# Target: runFGcheck (run the Fortran standalone executable - with CUDA/HIP MEs - fgcheck.exe, with a small number of events) -runFGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 +endif -# Target: cmpFcheck (compare ME results from the C++ and Fortran with C++ MEs standalone executables, with a small number of events) +# Target: cmpFcheck (compare ME results from the C++/CUDA/HIP and Fortran with C++/CUDA/HIP MEs standalone executables, with a small number of events) cmpFcheck: all.$(TAG) @echo - @echo "$(BUILDDIR)/check.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/check.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi - -# Target: cmpFGcheck (compare ME results from the CUDA/HIP and Fortran with CUDA/HIP MEs standalone executables, with a small number of events) -cmpFGcheck: all.$(TAG) - @echo - @echo "$(BUILDDIR)/gcheck.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fgcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/gcheck.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +ifeq ($(GPUCC),) + @echo "$(BUILDDIR)/check_cpp.exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_cpp.exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_cpp.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +else + @echo "$(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +endif # Target: cuda-memcheck (run the CUDA standalone executable gcheck.exe with a small number of events through cuda-memcheck) cuda-memcheck: all.$(TAG) - $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/gcheck.exe -p 2 32 2 + $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/makefile b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/makefile index 22517d0cc6..6a21300f7a 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/makefile +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/makefile @@ -58,13 +58,15 @@ endif LINKLIBS = $(LINK_MADLOOP_LIB) $(LINK_LOOP_LIBS) -L$(LIBDIR) -ldhelas -ldsample -lmodel -lgeneric -lpdf -lcernlib $(llhapdf) -lbias CUDACPP_MAKEFILE=cudacpp.mk -CUDACPP_COMMONLIB=mg5amc_common processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') ifeq ($(BACKEND),cuda) +CUDACPP_COMMONLIB=mg5amc_common_cuda CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_cuda else ifeq ($(BACKEND),hip) +CUDACPP_COMMONLIB=mg5amc_common_hip CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_hip else +CUDACPP_COMMONLIB=mg5amc_common_cpp CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_cpp endif diff --git a/epochX/cudacpp/gg_tt01g.mad/src/cudacpp_src.mk b/epochX/cudacpp/gg_tt01g.mad/src/cudacpp_src.mk index f727c0b313..d4cc8f4b78 100644 --- a/epochX/cudacpp/gg_tt01g.mad/src/cudacpp_src.mk +++ b/epochX/cudacpp/gg_tt01g.mad/src/cudacpp_src.mk @@ -98,7 +98,11 @@ endif # NB1: there are no CUDA targets in src as we avoid RDC! # NB2: CUDA includes for curand.h are no longer needed in the C++ code anywhere in src! -MG5AMC_COMMONLIB = mg5amc_common +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) @@ -126,7 +130,7 @@ $(LIBDIR)/.build.$(TAG): #------------------------------------------------------------------------------- # Generic target and build rules: objects from C++ compilation -$(BUILDDIR)/%.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -139,22 +143,22 @@ endif #------------------------------------------------------------------------------- -cxx_objects=$(addprefix $(BUILDDIR)/, read_slha.o) -ifneq ($(GPUCC),) - gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_sm_$(GPUSUFFIX).o) +cxx_objects=$(addprefix $(BUILDDIR)/, read_slha_cpp.o) +ifeq ($(GPUCC),) + cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_sm_cpp.o) else - cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_sm.o) + gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_sm_$(GPUSUFFIX).o) endif # Target (and build rules): common (src) library -ifneq ($(GPUCC),) -$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) - @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) -else +ifeq ($(GPUCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) +else +$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) + @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi + $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index fddf911cda..d44a6475ba 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0058176517486572266  +DEBUG: model prefixing takes 0.00538945198059082  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.023 s +1 processes with 16 diagrams generated in 0.021 s Total: 1 processes with 16 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -178,7 +178,7 @@ INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -193,15 +193,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg -Generated helas calls for 1 subprocesses (16 diagrams) in 0.040 s -Wrote files for 36 helas calls in 0.159 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.036 s +Wrote files for 36 helas calls in 0.145 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.349 s +ALOHA: aloha creates 5 routines in 0.306 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -209,7 +209,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.332 s +ALOHA: aloha creates 10 routines in 0.286 s VVV1 VVV1 FFV1 @@ -254,10 +254,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.337s -user 0m2.058s -sys 0m0.239s -Code generation completed in 3 seconds +real 0m2.113s +user 0m1.886s +sys 0m0.223s +Code generation completed in 2 seconds ************************************************************ * * * W E L C O M E to * @@ -282,8 +282,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run quit INFO: @@ -312,8 +312,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards param quit INFO: diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk index f047530438..f8d86fd493 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk @@ -54,65 +54,6 @@ endif #------------------------------------------------------------------------------- -#=== Configure common compiler flags for C++ and CUDA/HIP - -INCFLAGS = -I. -OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here - -# Dependency on src directory -MG5AMC_COMMONLIB = mg5amc_common -LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -INCFLAGS += -I../../src - -# Compiler-specific googletest build directory (#125 and #738) -ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) - override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) -else ifneq ($(shell $(CXX) --version | egrep '^clang'),) - override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) - override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else - override CXXNAME = unknown -endif -###$(info CXXNAME=$(CXXNAME)) -override CXXNAMESUFFIX = _$(CXXNAME) - -# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) -export CXXNAMESUFFIX - -# Dependency on test directory -# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) -# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) -###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation -###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation -TESTDIRCOMMON = ../../../../../test -TESTDIRLOCAL = ../../test -ifneq ($(wildcard $(GTEST_ROOT)),) - TESTDIR = -else ifneq ($(LOCALGTEST),) - TESTDIR=$(TESTDIRLOCAL) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) - TESTDIR = $(TESTDIRCOMMON) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else - TESTDIR = -endif -ifneq ($(GTEST_ROOT),) - GTESTLIBDIR = $(GTEST_ROOT)/lib64/ - GTESTLIBS = $(GTESTLIBDIR)/libgtest.a - GTESTINC = -I$(GTEST_ROOT)/include -else - GTESTLIBDIR = - GTESTLIBS = - GTESTINC = -endif -###$(info GTEST_ROOT = $(GTEST_ROOT)) -###$(info LOCALGTEST = $(LOCALGTEST)) -###$(info TESTDIR = $(TESTDIR)) - -#------------------------------------------------------------------------------- - #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available: eventually use native instead?) @@ -340,6 +281,69 @@ endif #------------------------------------------------------------------------------- +#=== Configure common compiler flags for C++ and CUDA/HIP + +INCFLAGS = -I. +OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here + +# Dependency on src directory +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif +LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +INCFLAGS += -I../../src + +# Compiler-specific googletest build directory (#125 and #738) +ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) + override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) +else ifneq ($(shell $(CXX) --version | egrep '^clang'),) + override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) + override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else + override CXXNAME = unknown +endif +###$(info CXXNAME=$(CXXNAME)) +override CXXNAMESUFFIX = _$(CXXNAME) + +# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) +export CXXNAMESUFFIX + +# Dependency on test directory +# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) +# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) +###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation +###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation +TESTDIRCOMMON = ../../../../../test +TESTDIRLOCAL = ../../test +ifneq ($(wildcard $(GTEST_ROOT)),) + TESTDIR = +else ifneq ($(LOCALGTEST),) + TESTDIR=$(TESTDIRLOCAL) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) + TESTDIR = $(TESTDIRCOMMON) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else + TESTDIR = +endif +ifneq ($(GTEST_ROOT),) + GTESTLIBDIR = $(GTEST_ROOT)/lib64/ + GTESTLIBS = $(GTESTLIBDIR)/libgtest.a + GTESTINC = -I$(GTEST_ROOT)/include +else + GTESTLIBDIR = + GTESTLIBS = + GTESTINC = +endif +###$(info GTEST_ROOT = $(GTEST_ROOT)) +###$(info LOCALGTEST = $(LOCALGTEST)) +###$(info TESTDIR = $(TESTDIR)) + +#------------------------------------------------------------------------------- + #=== Configure PowerPC-specific compiler flags for C++ and CUDA/HIP # PowerPC-specific CXX compiler flags (being reviewed) @@ -606,27 +610,25 @@ override RUNTIME = #=== Makefile TARGETS and build rules below #=============================================================================== -cxx_main=$(BUILDDIR)/check.exe -fcxx_main=$(BUILDDIR)/fcheck.exe -ifneq ($(GPUCC),) - gpu_main=$(BUILDDIR)/gcheck.exe - fgpu_main=$(BUILDDIR)/fgcheck.exe +ifeq ($(GPUCC),) + cxx_checkmain=$(BUILDDIR)/check_cpp.exe + cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe + cxx_testmain=$(BUILDDIR)/runTest_cpp.exe else - gpu_main= - fgpu_main= + gpu_checkmain=$(BUILDDIR)/check_$(GPUSUFFIX).exe + gpu_fcheckmain=$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe + gpu_testmain=$(BUILDDIR)/runTest_$(GPUSUFFIX).exe endif -testmain=$(BUILDDIR)/runTest.exe - # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) # First target (default goal) -ifneq ($(GPUCC),) -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_main) $(fgpu_main) $(if $(GTESTLIBS),$(testmain)) +ifeq ($(GPUCC),) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_checkmain) $(cxx_fcheckmain) $(if $(GTESTLIBS),$(cxx_testmain)) else -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_main) $(fcxx_main) $(if $(GTESTLIBS),$(testmain)) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_checkmain) $(gpu_fcheckmain) $(if $(GTESTLIBS),$(gpu_testmain)) endif # Target (and build options): debug @@ -643,30 +645,30 @@ $(BUILDDIR)/.build.$(TAG): @if [ "$(oldtagsb)" != "" ]; then echo "Cannot build for tag=$(TAG) as old builds exist for other tags:"; echo " $(oldtagsb)"; echo "Please run 'make clean' first\nIf 'make clean' is not enough: run 'make clean USEBUILDDIR=1 AVX=$(AVX) FPTYPE=$(FPTYPE)' or 'make cleanall'"; exit 1; fi @touch $(BUILDDIR)/.build.$(TAG) -# Apply special build flags only to CrossSectionKernel[_$(GPUSUFFIX)].o (no fast math, see #117 and #516) +# Apply special build flags only to CrossSectionKernel_.o (no fast math, see #117 and #516) # Added edgecase for HIP compilation ifeq ($(shell $(CXX) --version | grep ^nvc++),) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -fno-fast-math -$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math endif -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o (NVTX in timermap.h, #679) -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) +# Apply special build flags only to check_sa_.o (NVTX in timermap.h, #679) +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o and (Cu|Hip)randRandomNumberKernel[_$(GPUSUFFIX)].o -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(RNDCXXFLAGS) +# Apply special build flags only to check_sa_.o and (Cu|Hip)randRandomNumberKernel_.o +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) ifeq ($(HASCURAND),hasCurand) # curand headers, #679 -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUDA_INC) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(CUDA_INC) endif ifeq ($(HASHIPRAND),hasHiprand) # hiprand headers -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(HIP_INC) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(HIP_INC) endif # Avoid "warning: builtin __has_trivial_... is deprecated; use __is_trivially_... instead" in GPUCC with icx2023 (#592) @@ -679,21 +681,21 @@ endif # Avoid clang warning "overriding '-ffp-contract=fast' option with '-ffp-contract=on'" (#516) # This patch does remove the warning, but I prefer to keep it disabled for the moment... ###ifneq ($(shell $(CXX) --version | egrep '^(clang|Apple clang|Intel)'),) -###$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -Wno-overriding-t-option ###ifneq ($(GPUCC),) -###$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option ###endif ###endif #### Apply special build flags only to CPPProcess.o (-flto) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += -flto +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += -flto #### Apply special build flags only to CPPProcess.o (AVXFLAGS) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += $(AVXFLAGS) +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += $(AVXFLAGS) # Generic target and build rules: objects from C++ compilation # (NB do not include CUDA_INC here! add it only for NVTX or curand #679) -$(BUILDDIR)/%.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -718,8 +720,8 @@ processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') ###$(info processid_short=$(processid_short)) MG5AMC_CXXLIB = mg5amc_$(processid_short)_cpp -cxx_objects_lib=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o -cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel.o $(BUILDDIR)/RamboSamplingKernels.o +cxx_objects_lib=$(BUILDDIR)/CPPProcess_cpp.o $(BUILDDIR)/MatrixElementKernels_cpp.o $(BUILDDIR)/BridgeKernels_cpp.o $(BUILDDIR)/CrossSectionKernels_cpp.o +cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_cpp.o $(BUILDDIR)/RamboSamplingKernels_cpp.o ifneq ($(GPUCC),) MG5AMC_GPULIB = mg5amc_$(processid_short)_$(GPUSUFFIX) @@ -728,8 +730,8 @@ gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/ endif # Target (and build rules): C++ and CUDA/HIP shared libraries -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) @@ -756,63 +758,63 @@ endif #------------------------------------------------------------------------------- # Target (and build rules): C++ and CUDA/HIP standalone executables -$(cxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o - $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(RNDLIBFLAGS) +$(cxx_checkmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_checkmain): $(BUILDDIR)/check_sa_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o + $(CXX) -o $@ $(BUILDDIR)/check_sa_cpp.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o $(RNDLIBFLAGS) ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(gpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(gpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_checkmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_checkmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(gpu_main): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_checkmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(gpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(gpu_main): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o +$(gpu_checkmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_checkmain): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(GPUCC) -o $@ $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(RNDLIBFLAGS) endif #------------------------------------------------------------------------------- # Generic target and build rules: objects from Fortran compilation -$(BUILDDIR)/%.o : %.f *.inc +$(BUILDDIR)/%_fortran.o : %.f *.inc @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(FC) -I. -c $< -o $@ # Generic target and build rules: objects from Fortran compilation -###$(BUILDDIR)/%.o : %.f *.inc +###$(BUILDDIR)/%_fortran.o : %.f *.inc ### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi ### @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi ### $(FC) -I. -I$(INCDIR) -c $< -o $@ # Target (and build rules): Fortran standalone executables -###$(BUILDDIR)/fcheck_sa.o : $(INCDIR)/fbridge.inc +###$(BUILDDIR)/fcheck_sa_fortran.o : $(INCDIR)/fbridge.inc ifeq ($(UNAME_S),Darwin) -$(fcxx_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(cxx_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fcxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fcxx_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) +$(cxx_fcheckmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ else - $(CXX) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) + $(CXX) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) endif ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(fgpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(fgpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_fcheckmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_fcheckmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') endif ifeq ($(UNAME_S),Darwin) -$(fgpu_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(gpu_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fgpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fgpu_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) +$(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else - $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) + $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif endif @@ -820,83 +822,94 @@ endif # Target (and build rules): test objects and test executable ifeq ($(GPUCC),) -$(BUILDDIR)/testxxx.o: $(GTESTLIBS) -$(BUILDDIR)/testxxx.o: INCFLAGS += $(GTESTINC) -$(BUILDDIR)/testxxx.o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testxxx.o # Comment out this line to skip the C++ test of xxx functions +$(BUILDDIR)/testxxx_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testxxx_cpp.o: INCFLAGS += $(GTESTINC) +$(BUILDDIR)/testxxx_cpp.o: testxxx_cc_ref.txt +$(cxx_testmain): $(BUILDDIR)/testxxx_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testxxx_cpp.o # Comment out this line to skip the C++ test of xxx functions else $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions +$(gpu_testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions endif ifneq ($(UNAME_S),Darwin) # Disable testmisc on Darwin (workaround for issue #838) ifeq ($(GPUCC),) -$(BUILDDIR)/testmisc.o: $(GTESTLIBS) -$(BUILDDIR)/testmisc.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testmisc.o # Comment out this line to skip the C++ miscellaneous tests +$(BUILDDIR)/testmisc_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testmisc_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/testmisc_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testmisc_cpp.o # Comment out this line to skip the C++ miscellaneous tests else $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests +$(gpu_testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests endif endif ifeq ($(GPUCC),) -$(BUILDDIR)/runTest.o: $(GTESTLIBS) -$(BUILDDIR)/runTest.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/runTest.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/runTest.o +$(BUILDDIR)/runTest_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/runTest_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/runTest_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/runTest_cpp.o else $(BUILDDIR)/runTest_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/runTest_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o endif -$(testmain): $(GTESTLIBS) -$(testmain): INCFLAGS += $(GTESTINC) -$(testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest -###$(testmain): LIBFLAGS += -lgtest_main # no longer necessary since we added main() to testxxx.cc +ifeq ($(GPUCC),) +$(cxx_testmain): $(GTESTLIBS) +$(cxx_testmain): INCFLAGS += $(GTESTINC) +$(cxx_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +else +$(gpu_testmain): $(GTESTLIBS) +$(gpu_testmain): INCFLAGS += $(GTESTINC) +$(gpu_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +endif +ifeq ($(GPUCC),) # if at all, OMP is used only in CXX builds (not in GPU builds) ifneq ($(OMPFLAGS),) ifneq ($(shell $(CXX) --version | egrep '^Intel'),) -$(testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) +$(cxx_testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) else ifneq ($(shell $(CXX) --version | egrep '^clang'),) -$(testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 +$(cxx_testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 ###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -###$(testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) +###$(cxx_testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) else -$(testmain): LIBFLAGS += -lgomp +$(cxx_testmain): LIBFLAGS += -lgomp +endif endif endif # Test quadmath in testmisc.cc tests for constexpr_math #627 -###$(testmain): LIBFLAGS += -lquadmath +###ifeq ($(GPUCC),) +###$(cxx_testmain): LIBFLAGS += -lquadmath +###else +###$(gpu_testmain): LIBFLAGS += -lquadmath +###endif # Bypass std::filesystem completely to ease portability on LUMI #803 -#ifneq ($(findstring hipcc,$(GPUCC)),) -#$(testmain): LIBFLAGS += -lstdc++fs -#endif +###ifneq ($(findstring hipcc,$(GPUCC)),) +###$(gpu_testmain): LIBFLAGS += -lstdc++fs +###endif -ifeq ($(GPUCC),) # link only runTest.o -$(testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) +ifeq ($(GPUCC),) # link only runTest_cpp.o +$(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) $(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS) -else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest.o and runTest_$(GPUSUFFIX).o) -$(testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) +else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o) +$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else @@ -1051,51 +1064,50 @@ endif #------------------------------------------------------------------------------- -# Target: check (run the C++ test executable) -# [NB THIS IS WHAT IS USED IN THE GITHUB CI!] -# [FIXME: SHOULD CHANGE THE TARGET NAME "check" THAT HAS NOTHING TO DO WITH "check.exe"] -ifneq ($(GPUCC),) -check: runTest cmpFGcheck -else -check: runTest cmpFcheck -endif +# Target: 'make test' (execute runTest.exe, and compare check.exe with fcheck.exe) +# [NB: THIS IS WHAT IS TESTED IN THE GITHUB CI!] +# [NB: This used to be called 'make check' but the name has been changed as this has nothing to do with 'check.exe'] +test: runTest cmpFcheck -# Target: runTest (run the C++ test executable runTest.exe) +# Target: runTest (run the C++ or CUDA/HIP test executable runTest.exe) runTest: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/runTest.exe +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/runTest_cpp.exe +else + $(RUNTIME) $(BUILDDIR)/runTest_$(GPUSUFFIX).exe +endif -# Target: runCheck (run the C++ standalone executable check.exe, with a small number of events) +# Target: runCheck (run the C++ or CUDA/HIP standalone executable check.exe, with a small number of events) runCheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/check.exe -p 2 32 2 - -# Target: runGcheck (run the CUDA/HIP standalone executable gcheck.exe, with a small number of events) -runGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/gcheck.exe -p 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/check_cpp.exe -p 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +endif -# Target: runFcheck (run the Fortran standalone executable - with C++ MEs - fcheck.exe, with a small number of events) +# Target: runFcheck (run the Fortran standalone executable - with C++ or CUDA/HIP MEs - fcheck.exe, with a small number of events) runFcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 - -# Target: runFGcheck (run the Fortran standalone executable - with CUDA/HIP MEs - fgcheck.exe, with a small number of events) -runFGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 +endif -# Target: cmpFcheck (compare ME results from the C++ and Fortran with C++ MEs standalone executables, with a small number of events) +# Target: cmpFcheck (compare ME results from the C++/CUDA/HIP and Fortran with C++/CUDA/HIP MEs standalone executables, with a small number of events) cmpFcheck: all.$(TAG) @echo - @echo "$(BUILDDIR)/check.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/check.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi - -# Target: cmpFGcheck (compare ME results from the CUDA/HIP and Fortran with CUDA/HIP MEs standalone executables, with a small number of events) -cmpFGcheck: all.$(TAG) - @echo - @echo "$(BUILDDIR)/gcheck.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fgcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/gcheck.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +ifeq ($(GPUCC),) + @echo "$(BUILDDIR)/check_cpp.exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_cpp.exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_cpp.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +else + @echo "$(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +endif # Target: cuda-memcheck (run the CUDA standalone executable gcheck.exe with a small number of events through cuda-memcheck) cuda-memcheck: all.$(TAG) - $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/gcheck.exe -p 2 32 2 + $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/makefile b/epochX/cudacpp/gg_ttg.mad/SubProcesses/makefile index 22517d0cc6..6a21300f7a 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/makefile +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/makefile @@ -58,13 +58,15 @@ endif LINKLIBS = $(LINK_MADLOOP_LIB) $(LINK_LOOP_LIBS) -L$(LIBDIR) -ldhelas -ldsample -lmodel -lgeneric -lpdf -lcernlib $(llhapdf) -lbias CUDACPP_MAKEFILE=cudacpp.mk -CUDACPP_COMMONLIB=mg5amc_common processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') ifeq ($(BACKEND),cuda) +CUDACPP_COMMONLIB=mg5amc_common_cuda CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_cuda else ifeq ($(BACKEND),hip) +CUDACPP_COMMONLIB=mg5amc_common_hip CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_hip else +CUDACPP_COMMONLIB=mg5amc_common_cpp CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_cpp endif diff --git a/epochX/cudacpp/gg_ttg.mad/src/cudacpp_src.mk b/epochX/cudacpp/gg_ttg.mad/src/cudacpp_src.mk index f727c0b313..d4cc8f4b78 100644 --- a/epochX/cudacpp/gg_ttg.mad/src/cudacpp_src.mk +++ b/epochX/cudacpp/gg_ttg.mad/src/cudacpp_src.mk @@ -98,7 +98,11 @@ endif # NB1: there are no CUDA targets in src as we avoid RDC! # NB2: CUDA includes for curand.h are no longer needed in the C++ code anywhere in src! -MG5AMC_COMMONLIB = mg5amc_common +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) @@ -126,7 +130,7 @@ $(LIBDIR)/.build.$(TAG): #------------------------------------------------------------------------------- # Generic target and build rules: objects from C++ compilation -$(BUILDDIR)/%.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -139,22 +143,22 @@ endif #------------------------------------------------------------------------------- -cxx_objects=$(addprefix $(BUILDDIR)/, read_slha.o) -ifneq ($(GPUCC),) - gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_sm_$(GPUSUFFIX).o) +cxx_objects=$(addprefix $(BUILDDIR)/, read_slha_cpp.o) +ifeq ($(GPUCC),) + cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_sm_cpp.o) else - cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_sm.o) + gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_sm_$(GPUSUFFIX).o) endif # Target (and build rules): common (src) library -ifneq ($(GPUCC),) -$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) - @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) -else +ifeq ($(GPUCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) +else +$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) + @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi + $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index 7d0df1bb94..87eef8bde0 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005813121795654297  +DEBUG: model prefixing takes 0.00493621826171875  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.023 s +1 processes with 16 diagrams generated in 0.021 s Total: 1 processes with 16 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttg Load PLUGIN.CUDACPP_OUTPUT @@ -178,7 +178,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. -Generated helas calls for 1 subprocesses (16 diagrams) in 0.039 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.036 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -186,7 +186,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.347 s +ALOHA: aloha creates 5 routines in 0.303 s VVV1 VVV1 FFV1 @@ -206,7 +206,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. quit -real 0m0.824s -user 0m0.761s -sys 0m0.060s +real 0m0.746s +user 0m0.690s +sys 0m0.048s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk index f047530438..f8d86fd493 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk @@ -54,65 +54,6 @@ endif #------------------------------------------------------------------------------- -#=== Configure common compiler flags for C++ and CUDA/HIP - -INCFLAGS = -I. -OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here - -# Dependency on src directory -MG5AMC_COMMONLIB = mg5amc_common -LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -INCFLAGS += -I../../src - -# Compiler-specific googletest build directory (#125 and #738) -ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) - override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) -else ifneq ($(shell $(CXX) --version | egrep '^clang'),) - override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) - override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else - override CXXNAME = unknown -endif -###$(info CXXNAME=$(CXXNAME)) -override CXXNAMESUFFIX = _$(CXXNAME) - -# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) -export CXXNAMESUFFIX - -# Dependency on test directory -# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) -# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) -###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation -###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation -TESTDIRCOMMON = ../../../../../test -TESTDIRLOCAL = ../../test -ifneq ($(wildcard $(GTEST_ROOT)),) - TESTDIR = -else ifneq ($(LOCALGTEST),) - TESTDIR=$(TESTDIRLOCAL) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) - TESTDIR = $(TESTDIRCOMMON) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else - TESTDIR = -endif -ifneq ($(GTEST_ROOT),) - GTESTLIBDIR = $(GTEST_ROOT)/lib64/ - GTESTLIBS = $(GTESTLIBDIR)/libgtest.a - GTESTINC = -I$(GTEST_ROOT)/include -else - GTESTLIBDIR = - GTESTLIBS = - GTESTINC = -endif -###$(info GTEST_ROOT = $(GTEST_ROOT)) -###$(info LOCALGTEST = $(LOCALGTEST)) -###$(info TESTDIR = $(TESTDIR)) - -#------------------------------------------------------------------------------- - #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available: eventually use native instead?) @@ -340,6 +281,69 @@ endif #------------------------------------------------------------------------------- +#=== Configure common compiler flags for C++ and CUDA/HIP + +INCFLAGS = -I. +OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here + +# Dependency on src directory +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif +LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +INCFLAGS += -I../../src + +# Compiler-specific googletest build directory (#125 and #738) +ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) + override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) +else ifneq ($(shell $(CXX) --version | egrep '^clang'),) + override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) + override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else + override CXXNAME = unknown +endif +###$(info CXXNAME=$(CXXNAME)) +override CXXNAMESUFFIX = _$(CXXNAME) + +# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) +export CXXNAMESUFFIX + +# Dependency on test directory +# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) +# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) +###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation +###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation +TESTDIRCOMMON = ../../../../../test +TESTDIRLOCAL = ../../test +ifneq ($(wildcard $(GTEST_ROOT)),) + TESTDIR = +else ifneq ($(LOCALGTEST),) + TESTDIR=$(TESTDIRLOCAL) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) + TESTDIR = $(TESTDIRCOMMON) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else + TESTDIR = +endif +ifneq ($(GTEST_ROOT),) + GTESTLIBDIR = $(GTEST_ROOT)/lib64/ + GTESTLIBS = $(GTESTLIBDIR)/libgtest.a + GTESTINC = -I$(GTEST_ROOT)/include +else + GTESTLIBDIR = + GTESTLIBS = + GTESTINC = +endif +###$(info GTEST_ROOT = $(GTEST_ROOT)) +###$(info LOCALGTEST = $(LOCALGTEST)) +###$(info TESTDIR = $(TESTDIR)) + +#------------------------------------------------------------------------------- + #=== Configure PowerPC-specific compiler flags for C++ and CUDA/HIP # PowerPC-specific CXX compiler flags (being reviewed) @@ -606,27 +610,25 @@ override RUNTIME = #=== Makefile TARGETS and build rules below #=============================================================================== -cxx_main=$(BUILDDIR)/check.exe -fcxx_main=$(BUILDDIR)/fcheck.exe -ifneq ($(GPUCC),) - gpu_main=$(BUILDDIR)/gcheck.exe - fgpu_main=$(BUILDDIR)/fgcheck.exe +ifeq ($(GPUCC),) + cxx_checkmain=$(BUILDDIR)/check_cpp.exe + cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe + cxx_testmain=$(BUILDDIR)/runTest_cpp.exe else - gpu_main= - fgpu_main= + gpu_checkmain=$(BUILDDIR)/check_$(GPUSUFFIX).exe + gpu_fcheckmain=$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe + gpu_testmain=$(BUILDDIR)/runTest_$(GPUSUFFIX).exe endif -testmain=$(BUILDDIR)/runTest.exe - # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) # First target (default goal) -ifneq ($(GPUCC),) -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_main) $(fgpu_main) $(if $(GTESTLIBS),$(testmain)) +ifeq ($(GPUCC),) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_checkmain) $(cxx_fcheckmain) $(if $(GTESTLIBS),$(cxx_testmain)) else -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_main) $(fcxx_main) $(if $(GTESTLIBS),$(testmain)) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_checkmain) $(gpu_fcheckmain) $(if $(GTESTLIBS),$(gpu_testmain)) endif # Target (and build options): debug @@ -643,30 +645,30 @@ $(BUILDDIR)/.build.$(TAG): @if [ "$(oldtagsb)" != "" ]; then echo "Cannot build for tag=$(TAG) as old builds exist for other tags:"; echo " $(oldtagsb)"; echo "Please run 'make clean' first\nIf 'make clean' is not enough: run 'make clean USEBUILDDIR=1 AVX=$(AVX) FPTYPE=$(FPTYPE)' or 'make cleanall'"; exit 1; fi @touch $(BUILDDIR)/.build.$(TAG) -# Apply special build flags only to CrossSectionKernel[_$(GPUSUFFIX)].o (no fast math, see #117 and #516) +# Apply special build flags only to CrossSectionKernel_.o (no fast math, see #117 and #516) # Added edgecase for HIP compilation ifeq ($(shell $(CXX) --version | grep ^nvc++),) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -fno-fast-math -$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math endif -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o (NVTX in timermap.h, #679) -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) +# Apply special build flags only to check_sa_.o (NVTX in timermap.h, #679) +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o and (Cu|Hip)randRandomNumberKernel[_$(GPUSUFFIX)].o -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(RNDCXXFLAGS) +# Apply special build flags only to check_sa_.o and (Cu|Hip)randRandomNumberKernel_.o +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) ifeq ($(HASCURAND),hasCurand) # curand headers, #679 -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUDA_INC) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(CUDA_INC) endif ifeq ($(HASHIPRAND),hasHiprand) # hiprand headers -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(HIP_INC) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(HIP_INC) endif # Avoid "warning: builtin __has_trivial_... is deprecated; use __is_trivially_... instead" in GPUCC with icx2023 (#592) @@ -679,21 +681,21 @@ endif # Avoid clang warning "overriding '-ffp-contract=fast' option with '-ffp-contract=on'" (#516) # This patch does remove the warning, but I prefer to keep it disabled for the moment... ###ifneq ($(shell $(CXX) --version | egrep '^(clang|Apple clang|Intel)'),) -###$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -Wno-overriding-t-option ###ifneq ($(GPUCC),) -###$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option ###endif ###endif #### Apply special build flags only to CPPProcess.o (-flto) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += -flto +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += -flto #### Apply special build flags only to CPPProcess.o (AVXFLAGS) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += $(AVXFLAGS) +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += $(AVXFLAGS) # Generic target and build rules: objects from C++ compilation # (NB do not include CUDA_INC here! add it only for NVTX or curand #679) -$(BUILDDIR)/%.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -718,8 +720,8 @@ processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') ###$(info processid_short=$(processid_short)) MG5AMC_CXXLIB = mg5amc_$(processid_short)_cpp -cxx_objects_lib=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o -cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel.o $(BUILDDIR)/RamboSamplingKernels.o +cxx_objects_lib=$(BUILDDIR)/CPPProcess_cpp.o $(BUILDDIR)/MatrixElementKernels_cpp.o $(BUILDDIR)/BridgeKernels_cpp.o $(BUILDDIR)/CrossSectionKernels_cpp.o +cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_cpp.o $(BUILDDIR)/RamboSamplingKernels_cpp.o ifneq ($(GPUCC),) MG5AMC_GPULIB = mg5amc_$(processid_short)_$(GPUSUFFIX) @@ -728,8 +730,8 @@ gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/ endif # Target (and build rules): C++ and CUDA/HIP shared libraries -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) @@ -756,63 +758,63 @@ endif #------------------------------------------------------------------------------- # Target (and build rules): C++ and CUDA/HIP standalone executables -$(cxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o - $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(RNDLIBFLAGS) +$(cxx_checkmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_checkmain): $(BUILDDIR)/check_sa_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o + $(CXX) -o $@ $(BUILDDIR)/check_sa_cpp.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o $(RNDLIBFLAGS) ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(gpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(gpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_checkmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_checkmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(gpu_main): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_checkmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(gpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(gpu_main): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o +$(gpu_checkmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_checkmain): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(GPUCC) -o $@ $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(RNDLIBFLAGS) endif #------------------------------------------------------------------------------- # Generic target and build rules: objects from Fortran compilation -$(BUILDDIR)/%.o : %.f *.inc +$(BUILDDIR)/%_fortran.o : %.f *.inc @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(FC) -I. -c $< -o $@ # Generic target and build rules: objects from Fortran compilation -###$(BUILDDIR)/%.o : %.f *.inc +###$(BUILDDIR)/%_fortran.o : %.f *.inc ### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi ### @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi ### $(FC) -I. -I$(INCDIR) -c $< -o $@ # Target (and build rules): Fortran standalone executables -###$(BUILDDIR)/fcheck_sa.o : $(INCDIR)/fbridge.inc +###$(BUILDDIR)/fcheck_sa_fortran.o : $(INCDIR)/fbridge.inc ifeq ($(UNAME_S),Darwin) -$(fcxx_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(cxx_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fcxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fcxx_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) +$(cxx_fcheckmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ else - $(CXX) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) + $(CXX) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) endif ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(fgpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(fgpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_fcheckmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_fcheckmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') endif ifeq ($(UNAME_S),Darwin) -$(fgpu_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(gpu_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fgpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fgpu_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) +$(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else - $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) + $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif endif @@ -820,83 +822,94 @@ endif # Target (and build rules): test objects and test executable ifeq ($(GPUCC),) -$(BUILDDIR)/testxxx.o: $(GTESTLIBS) -$(BUILDDIR)/testxxx.o: INCFLAGS += $(GTESTINC) -$(BUILDDIR)/testxxx.o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testxxx.o # Comment out this line to skip the C++ test of xxx functions +$(BUILDDIR)/testxxx_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testxxx_cpp.o: INCFLAGS += $(GTESTINC) +$(BUILDDIR)/testxxx_cpp.o: testxxx_cc_ref.txt +$(cxx_testmain): $(BUILDDIR)/testxxx_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testxxx_cpp.o # Comment out this line to skip the C++ test of xxx functions else $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions +$(gpu_testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions endif ifneq ($(UNAME_S),Darwin) # Disable testmisc on Darwin (workaround for issue #838) ifeq ($(GPUCC),) -$(BUILDDIR)/testmisc.o: $(GTESTLIBS) -$(BUILDDIR)/testmisc.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testmisc.o # Comment out this line to skip the C++ miscellaneous tests +$(BUILDDIR)/testmisc_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testmisc_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/testmisc_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testmisc_cpp.o # Comment out this line to skip the C++ miscellaneous tests else $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests +$(gpu_testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests endif endif ifeq ($(GPUCC),) -$(BUILDDIR)/runTest.o: $(GTESTLIBS) -$(BUILDDIR)/runTest.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/runTest.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/runTest.o +$(BUILDDIR)/runTest_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/runTest_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/runTest_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/runTest_cpp.o else $(BUILDDIR)/runTest_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/runTest_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o endif -$(testmain): $(GTESTLIBS) -$(testmain): INCFLAGS += $(GTESTINC) -$(testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest -###$(testmain): LIBFLAGS += -lgtest_main # no longer necessary since we added main() to testxxx.cc +ifeq ($(GPUCC),) +$(cxx_testmain): $(GTESTLIBS) +$(cxx_testmain): INCFLAGS += $(GTESTINC) +$(cxx_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +else +$(gpu_testmain): $(GTESTLIBS) +$(gpu_testmain): INCFLAGS += $(GTESTINC) +$(gpu_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +endif +ifeq ($(GPUCC),) # if at all, OMP is used only in CXX builds (not in GPU builds) ifneq ($(OMPFLAGS),) ifneq ($(shell $(CXX) --version | egrep '^Intel'),) -$(testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) +$(cxx_testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) else ifneq ($(shell $(CXX) --version | egrep '^clang'),) -$(testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 +$(cxx_testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 ###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -###$(testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) +###$(cxx_testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) else -$(testmain): LIBFLAGS += -lgomp +$(cxx_testmain): LIBFLAGS += -lgomp +endif endif endif # Test quadmath in testmisc.cc tests for constexpr_math #627 -###$(testmain): LIBFLAGS += -lquadmath +###ifeq ($(GPUCC),) +###$(cxx_testmain): LIBFLAGS += -lquadmath +###else +###$(gpu_testmain): LIBFLAGS += -lquadmath +###endif # Bypass std::filesystem completely to ease portability on LUMI #803 -#ifneq ($(findstring hipcc,$(GPUCC)),) -#$(testmain): LIBFLAGS += -lstdc++fs -#endif +###ifneq ($(findstring hipcc,$(GPUCC)),) +###$(gpu_testmain): LIBFLAGS += -lstdc++fs +###endif -ifeq ($(GPUCC),) # link only runTest.o -$(testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) +ifeq ($(GPUCC),) # link only runTest_cpp.o +$(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) $(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS) -else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest.o and runTest_$(GPUSUFFIX).o) -$(testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) +else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o) +$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else @@ -1051,51 +1064,50 @@ endif #------------------------------------------------------------------------------- -# Target: check (run the C++ test executable) -# [NB THIS IS WHAT IS USED IN THE GITHUB CI!] -# [FIXME: SHOULD CHANGE THE TARGET NAME "check" THAT HAS NOTHING TO DO WITH "check.exe"] -ifneq ($(GPUCC),) -check: runTest cmpFGcheck -else -check: runTest cmpFcheck -endif +# Target: 'make test' (execute runTest.exe, and compare check.exe with fcheck.exe) +# [NB: THIS IS WHAT IS TESTED IN THE GITHUB CI!] +# [NB: This used to be called 'make check' but the name has been changed as this has nothing to do with 'check.exe'] +test: runTest cmpFcheck -# Target: runTest (run the C++ test executable runTest.exe) +# Target: runTest (run the C++ or CUDA/HIP test executable runTest.exe) runTest: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/runTest.exe +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/runTest_cpp.exe +else + $(RUNTIME) $(BUILDDIR)/runTest_$(GPUSUFFIX).exe +endif -# Target: runCheck (run the C++ standalone executable check.exe, with a small number of events) +# Target: runCheck (run the C++ or CUDA/HIP standalone executable check.exe, with a small number of events) runCheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/check.exe -p 2 32 2 - -# Target: runGcheck (run the CUDA/HIP standalone executable gcheck.exe, with a small number of events) -runGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/gcheck.exe -p 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/check_cpp.exe -p 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +endif -# Target: runFcheck (run the Fortran standalone executable - with C++ MEs - fcheck.exe, with a small number of events) +# Target: runFcheck (run the Fortran standalone executable - with C++ or CUDA/HIP MEs - fcheck.exe, with a small number of events) runFcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 - -# Target: runFGcheck (run the Fortran standalone executable - with CUDA/HIP MEs - fgcheck.exe, with a small number of events) -runFGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 +endif -# Target: cmpFcheck (compare ME results from the C++ and Fortran with C++ MEs standalone executables, with a small number of events) +# Target: cmpFcheck (compare ME results from the C++/CUDA/HIP and Fortran with C++/CUDA/HIP MEs standalone executables, with a small number of events) cmpFcheck: all.$(TAG) @echo - @echo "$(BUILDDIR)/check.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/check.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi - -# Target: cmpFGcheck (compare ME results from the CUDA/HIP and Fortran with CUDA/HIP MEs standalone executables, with a small number of events) -cmpFGcheck: all.$(TAG) - @echo - @echo "$(BUILDDIR)/gcheck.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fgcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/gcheck.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +ifeq ($(GPUCC),) + @echo "$(BUILDDIR)/check_cpp.exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_cpp.exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_cpp.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +else + @echo "$(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +endif # Target: cuda-memcheck (run the CUDA standalone executable gcheck.exe with a small number of events through cuda-memcheck) cuda-memcheck: all.$(TAG) - $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/gcheck.exe -p 2 32 2 + $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttg.sa/src/cudacpp_src.mk b/epochX/cudacpp/gg_ttg.sa/src/cudacpp_src.mk index f727c0b313..d4cc8f4b78 100644 --- a/epochX/cudacpp/gg_ttg.sa/src/cudacpp_src.mk +++ b/epochX/cudacpp/gg_ttg.sa/src/cudacpp_src.mk @@ -98,7 +98,11 @@ endif # NB1: there are no CUDA targets in src as we avoid RDC! # NB2: CUDA includes for curand.h are no longer needed in the C++ code anywhere in src! -MG5AMC_COMMONLIB = mg5amc_common +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) @@ -126,7 +130,7 @@ $(LIBDIR)/.build.$(TAG): #------------------------------------------------------------------------------- # Generic target and build rules: objects from C++ compilation -$(BUILDDIR)/%.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -139,22 +143,22 @@ endif #------------------------------------------------------------------------------- -cxx_objects=$(addprefix $(BUILDDIR)/, read_slha.o) -ifneq ($(GPUCC),) - gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_sm_$(GPUSUFFIX).o) +cxx_objects=$(addprefix $(BUILDDIR)/, read_slha_cpp.o) +ifeq ($(GPUCC),) + cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_sm_cpp.o) else - cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_sm.o) + gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_sm_$(GPUSUFFIX).o) endif # Target (and build rules): common (src) library -ifneq ($(GPUCC),) -$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) - @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) -else +ifeq ($(GPUCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) +else +$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) + @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi + $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index 70ce80900e..d88bbb63fe 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005890607833862305  +DEBUG: model prefixing takes 0.005059242248535156  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.168 s +1 processes with 123 diagrams generated in 0.154 s Total: 1 processes with 123 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -178,7 +178,7 @@ INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 INFO: Creating files in directory P1_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -193,15 +193,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg -Generated helas calls for 1 subprocesses (123 diagrams) in 0.453 s -Wrote files for 222 helas calls in 0.738 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.409 s +Wrote files for 222 helas calls in 0.662 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.356 s +ALOHA: aloha creates 5 routines in 0.310 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -209,7 +209,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.338 s +ALOHA: aloha creates 10 routines in 0.305 s VVV1 VVV1 FFV1 @@ -257,10 +257,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.464s -user 0m3.217s -sys 0m0.241s -Code generation completed in 4 seconds +real 0m3.178s +user 0m2.929s +sys 0m0.239s +Code generation completed in 3 seconds ************************************************************ * * * W E L C O M E to * @@ -285,8 +285,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run quit INFO: @@ -315,8 +315,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards param quit INFO: diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk index f047530438..f8d86fd493 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk @@ -54,65 +54,6 @@ endif #------------------------------------------------------------------------------- -#=== Configure common compiler flags for C++ and CUDA/HIP - -INCFLAGS = -I. -OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here - -# Dependency on src directory -MG5AMC_COMMONLIB = mg5amc_common -LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -INCFLAGS += -I../../src - -# Compiler-specific googletest build directory (#125 and #738) -ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) - override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) -else ifneq ($(shell $(CXX) --version | egrep '^clang'),) - override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) - override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else - override CXXNAME = unknown -endif -###$(info CXXNAME=$(CXXNAME)) -override CXXNAMESUFFIX = _$(CXXNAME) - -# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) -export CXXNAMESUFFIX - -# Dependency on test directory -# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) -# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) -###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation -###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation -TESTDIRCOMMON = ../../../../../test -TESTDIRLOCAL = ../../test -ifneq ($(wildcard $(GTEST_ROOT)),) - TESTDIR = -else ifneq ($(LOCALGTEST),) - TESTDIR=$(TESTDIRLOCAL) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) - TESTDIR = $(TESTDIRCOMMON) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else - TESTDIR = -endif -ifneq ($(GTEST_ROOT),) - GTESTLIBDIR = $(GTEST_ROOT)/lib64/ - GTESTLIBS = $(GTESTLIBDIR)/libgtest.a - GTESTINC = -I$(GTEST_ROOT)/include -else - GTESTLIBDIR = - GTESTLIBS = - GTESTINC = -endif -###$(info GTEST_ROOT = $(GTEST_ROOT)) -###$(info LOCALGTEST = $(LOCALGTEST)) -###$(info TESTDIR = $(TESTDIR)) - -#------------------------------------------------------------------------------- - #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available: eventually use native instead?) @@ -340,6 +281,69 @@ endif #------------------------------------------------------------------------------- +#=== Configure common compiler flags for C++ and CUDA/HIP + +INCFLAGS = -I. +OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here + +# Dependency on src directory +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif +LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +INCFLAGS += -I../../src + +# Compiler-specific googletest build directory (#125 and #738) +ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) + override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) +else ifneq ($(shell $(CXX) --version | egrep '^clang'),) + override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) + override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else + override CXXNAME = unknown +endif +###$(info CXXNAME=$(CXXNAME)) +override CXXNAMESUFFIX = _$(CXXNAME) + +# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) +export CXXNAMESUFFIX + +# Dependency on test directory +# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) +# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) +###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation +###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation +TESTDIRCOMMON = ../../../../../test +TESTDIRLOCAL = ../../test +ifneq ($(wildcard $(GTEST_ROOT)),) + TESTDIR = +else ifneq ($(LOCALGTEST),) + TESTDIR=$(TESTDIRLOCAL) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) + TESTDIR = $(TESTDIRCOMMON) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else + TESTDIR = +endif +ifneq ($(GTEST_ROOT),) + GTESTLIBDIR = $(GTEST_ROOT)/lib64/ + GTESTLIBS = $(GTESTLIBDIR)/libgtest.a + GTESTINC = -I$(GTEST_ROOT)/include +else + GTESTLIBDIR = + GTESTLIBS = + GTESTINC = +endif +###$(info GTEST_ROOT = $(GTEST_ROOT)) +###$(info LOCALGTEST = $(LOCALGTEST)) +###$(info TESTDIR = $(TESTDIR)) + +#------------------------------------------------------------------------------- + #=== Configure PowerPC-specific compiler flags for C++ and CUDA/HIP # PowerPC-specific CXX compiler flags (being reviewed) @@ -606,27 +610,25 @@ override RUNTIME = #=== Makefile TARGETS and build rules below #=============================================================================== -cxx_main=$(BUILDDIR)/check.exe -fcxx_main=$(BUILDDIR)/fcheck.exe -ifneq ($(GPUCC),) - gpu_main=$(BUILDDIR)/gcheck.exe - fgpu_main=$(BUILDDIR)/fgcheck.exe +ifeq ($(GPUCC),) + cxx_checkmain=$(BUILDDIR)/check_cpp.exe + cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe + cxx_testmain=$(BUILDDIR)/runTest_cpp.exe else - gpu_main= - fgpu_main= + gpu_checkmain=$(BUILDDIR)/check_$(GPUSUFFIX).exe + gpu_fcheckmain=$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe + gpu_testmain=$(BUILDDIR)/runTest_$(GPUSUFFIX).exe endif -testmain=$(BUILDDIR)/runTest.exe - # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) # First target (default goal) -ifneq ($(GPUCC),) -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_main) $(fgpu_main) $(if $(GTESTLIBS),$(testmain)) +ifeq ($(GPUCC),) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_checkmain) $(cxx_fcheckmain) $(if $(GTESTLIBS),$(cxx_testmain)) else -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_main) $(fcxx_main) $(if $(GTESTLIBS),$(testmain)) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_checkmain) $(gpu_fcheckmain) $(if $(GTESTLIBS),$(gpu_testmain)) endif # Target (and build options): debug @@ -643,30 +645,30 @@ $(BUILDDIR)/.build.$(TAG): @if [ "$(oldtagsb)" != "" ]; then echo "Cannot build for tag=$(TAG) as old builds exist for other tags:"; echo " $(oldtagsb)"; echo "Please run 'make clean' first\nIf 'make clean' is not enough: run 'make clean USEBUILDDIR=1 AVX=$(AVX) FPTYPE=$(FPTYPE)' or 'make cleanall'"; exit 1; fi @touch $(BUILDDIR)/.build.$(TAG) -# Apply special build flags only to CrossSectionKernel[_$(GPUSUFFIX)].o (no fast math, see #117 and #516) +# Apply special build flags only to CrossSectionKernel_.o (no fast math, see #117 and #516) # Added edgecase for HIP compilation ifeq ($(shell $(CXX) --version | grep ^nvc++),) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -fno-fast-math -$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math endif -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o (NVTX in timermap.h, #679) -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) +# Apply special build flags only to check_sa_.o (NVTX in timermap.h, #679) +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o and (Cu|Hip)randRandomNumberKernel[_$(GPUSUFFIX)].o -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(RNDCXXFLAGS) +# Apply special build flags only to check_sa_.o and (Cu|Hip)randRandomNumberKernel_.o +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) ifeq ($(HASCURAND),hasCurand) # curand headers, #679 -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUDA_INC) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(CUDA_INC) endif ifeq ($(HASHIPRAND),hasHiprand) # hiprand headers -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(HIP_INC) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(HIP_INC) endif # Avoid "warning: builtin __has_trivial_... is deprecated; use __is_trivially_... instead" in GPUCC with icx2023 (#592) @@ -679,21 +681,21 @@ endif # Avoid clang warning "overriding '-ffp-contract=fast' option with '-ffp-contract=on'" (#516) # This patch does remove the warning, but I prefer to keep it disabled for the moment... ###ifneq ($(shell $(CXX) --version | egrep '^(clang|Apple clang|Intel)'),) -###$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -Wno-overriding-t-option ###ifneq ($(GPUCC),) -###$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option ###endif ###endif #### Apply special build flags only to CPPProcess.o (-flto) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += -flto +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += -flto #### Apply special build flags only to CPPProcess.o (AVXFLAGS) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += $(AVXFLAGS) +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += $(AVXFLAGS) # Generic target and build rules: objects from C++ compilation # (NB do not include CUDA_INC here! add it only for NVTX or curand #679) -$(BUILDDIR)/%.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -718,8 +720,8 @@ processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') ###$(info processid_short=$(processid_short)) MG5AMC_CXXLIB = mg5amc_$(processid_short)_cpp -cxx_objects_lib=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o -cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel.o $(BUILDDIR)/RamboSamplingKernels.o +cxx_objects_lib=$(BUILDDIR)/CPPProcess_cpp.o $(BUILDDIR)/MatrixElementKernels_cpp.o $(BUILDDIR)/BridgeKernels_cpp.o $(BUILDDIR)/CrossSectionKernels_cpp.o +cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_cpp.o $(BUILDDIR)/RamboSamplingKernels_cpp.o ifneq ($(GPUCC),) MG5AMC_GPULIB = mg5amc_$(processid_short)_$(GPUSUFFIX) @@ -728,8 +730,8 @@ gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/ endif # Target (and build rules): C++ and CUDA/HIP shared libraries -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) @@ -756,63 +758,63 @@ endif #------------------------------------------------------------------------------- # Target (and build rules): C++ and CUDA/HIP standalone executables -$(cxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o - $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(RNDLIBFLAGS) +$(cxx_checkmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_checkmain): $(BUILDDIR)/check_sa_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o + $(CXX) -o $@ $(BUILDDIR)/check_sa_cpp.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o $(RNDLIBFLAGS) ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(gpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(gpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_checkmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_checkmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(gpu_main): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_checkmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(gpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(gpu_main): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o +$(gpu_checkmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_checkmain): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(GPUCC) -o $@ $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(RNDLIBFLAGS) endif #------------------------------------------------------------------------------- # Generic target and build rules: objects from Fortran compilation -$(BUILDDIR)/%.o : %.f *.inc +$(BUILDDIR)/%_fortran.o : %.f *.inc @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(FC) -I. -c $< -o $@ # Generic target and build rules: objects from Fortran compilation -###$(BUILDDIR)/%.o : %.f *.inc +###$(BUILDDIR)/%_fortran.o : %.f *.inc ### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi ### @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi ### $(FC) -I. -I$(INCDIR) -c $< -o $@ # Target (and build rules): Fortran standalone executables -###$(BUILDDIR)/fcheck_sa.o : $(INCDIR)/fbridge.inc +###$(BUILDDIR)/fcheck_sa_fortran.o : $(INCDIR)/fbridge.inc ifeq ($(UNAME_S),Darwin) -$(fcxx_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(cxx_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fcxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fcxx_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) +$(cxx_fcheckmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ else - $(CXX) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) + $(CXX) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) endif ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(fgpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(fgpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_fcheckmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_fcheckmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') endif ifeq ($(UNAME_S),Darwin) -$(fgpu_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(gpu_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fgpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fgpu_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) +$(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else - $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) + $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif endif @@ -820,83 +822,94 @@ endif # Target (and build rules): test objects and test executable ifeq ($(GPUCC),) -$(BUILDDIR)/testxxx.o: $(GTESTLIBS) -$(BUILDDIR)/testxxx.o: INCFLAGS += $(GTESTINC) -$(BUILDDIR)/testxxx.o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testxxx.o # Comment out this line to skip the C++ test of xxx functions +$(BUILDDIR)/testxxx_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testxxx_cpp.o: INCFLAGS += $(GTESTINC) +$(BUILDDIR)/testxxx_cpp.o: testxxx_cc_ref.txt +$(cxx_testmain): $(BUILDDIR)/testxxx_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testxxx_cpp.o # Comment out this line to skip the C++ test of xxx functions else $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions +$(gpu_testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions endif ifneq ($(UNAME_S),Darwin) # Disable testmisc on Darwin (workaround for issue #838) ifeq ($(GPUCC),) -$(BUILDDIR)/testmisc.o: $(GTESTLIBS) -$(BUILDDIR)/testmisc.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testmisc.o # Comment out this line to skip the C++ miscellaneous tests +$(BUILDDIR)/testmisc_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testmisc_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/testmisc_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testmisc_cpp.o # Comment out this line to skip the C++ miscellaneous tests else $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests +$(gpu_testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests endif endif ifeq ($(GPUCC),) -$(BUILDDIR)/runTest.o: $(GTESTLIBS) -$(BUILDDIR)/runTest.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/runTest.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/runTest.o +$(BUILDDIR)/runTest_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/runTest_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/runTest_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/runTest_cpp.o else $(BUILDDIR)/runTest_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/runTest_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o endif -$(testmain): $(GTESTLIBS) -$(testmain): INCFLAGS += $(GTESTINC) -$(testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest -###$(testmain): LIBFLAGS += -lgtest_main # no longer necessary since we added main() to testxxx.cc +ifeq ($(GPUCC),) +$(cxx_testmain): $(GTESTLIBS) +$(cxx_testmain): INCFLAGS += $(GTESTINC) +$(cxx_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +else +$(gpu_testmain): $(GTESTLIBS) +$(gpu_testmain): INCFLAGS += $(GTESTINC) +$(gpu_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +endif +ifeq ($(GPUCC),) # if at all, OMP is used only in CXX builds (not in GPU builds) ifneq ($(OMPFLAGS),) ifneq ($(shell $(CXX) --version | egrep '^Intel'),) -$(testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) +$(cxx_testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) else ifneq ($(shell $(CXX) --version | egrep '^clang'),) -$(testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 +$(cxx_testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 ###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -###$(testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) +###$(cxx_testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) else -$(testmain): LIBFLAGS += -lgomp +$(cxx_testmain): LIBFLAGS += -lgomp +endif endif endif # Test quadmath in testmisc.cc tests for constexpr_math #627 -###$(testmain): LIBFLAGS += -lquadmath +###ifeq ($(GPUCC),) +###$(cxx_testmain): LIBFLAGS += -lquadmath +###else +###$(gpu_testmain): LIBFLAGS += -lquadmath +###endif # Bypass std::filesystem completely to ease portability on LUMI #803 -#ifneq ($(findstring hipcc,$(GPUCC)),) -#$(testmain): LIBFLAGS += -lstdc++fs -#endif +###ifneq ($(findstring hipcc,$(GPUCC)),) +###$(gpu_testmain): LIBFLAGS += -lstdc++fs +###endif -ifeq ($(GPUCC),) # link only runTest.o -$(testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) +ifeq ($(GPUCC),) # link only runTest_cpp.o +$(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) $(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS) -else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest.o and runTest_$(GPUSUFFIX).o) -$(testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) +else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o) +$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else @@ -1051,51 +1064,50 @@ endif #------------------------------------------------------------------------------- -# Target: check (run the C++ test executable) -# [NB THIS IS WHAT IS USED IN THE GITHUB CI!] -# [FIXME: SHOULD CHANGE THE TARGET NAME "check" THAT HAS NOTHING TO DO WITH "check.exe"] -ifneq ($(GPUCC),) -check: runTest cmpFGcheck -else -check: runTest cmpFcheck -endif +# Target: 'make test' (execute runTest.exe, and compare check.exe with fcheck.exe) +# [NB: THIS IS WHAT IS TESTED IN THE GITHUB CI!] +# [NB: This used to be called 'make check' but the name has been changed as this has nothing to do with 'check.exe'] +test: runTest cmpFcheck -# Target: runTest (run the C++ test executable runTest.exe) +# Target: runTest (run the C++ or CUDA/HIP test executable runTest.exe) runTest: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/runTest.exe +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/runTest_cpp.exe +else + $(RUNTIME) $(BUILDDIR)/runTest_$(GPUSUFFIX).exe +endif -# Target: runCheck (run the C++ standalone executable check.exe, with a small number of events) +# Target: runCheck (run the C++ or CUDA/HIP standalone executable check.exe, with a small number of events) runCheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/check.exe -p 2 32 2 - -# Target: runGcheck (run the CUDA/HIP standalone executable gcheck.exe, with a small number of events) -runGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/gcheck.exe -p 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/check_cpp.exe -p 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +endif -# Target: runFcheck (run the Fortran standalone executable - with C++ MEs - fcheck.exe, with a small number of events) +# Target: runFcheck (run the Fortran standalone executable - with C++ or CUDA/HIP MEs - fcheck.exe, with a small number of events) runFcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 - -# Target: runFGcheck (run the Fortran standalone executable - with CUDA/HIP MEs - fgcheck.exe, with a small number of events) -runFGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 +endif -# Target: cmpFcheck (compare ME results from the C++ and Fortran with C++ MEs standalone executables, with a small number of events) +# Target: cmpFcheck (compare ME results from the C++/CUDA/HIP and Fortran with C++/CUDA/HIP MEs standalone executables, with a small number of events) cmpFcheck: all.$(TAG) @echo - @echo "$(BUILDDIR)/check.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/check.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi - -# Target: cmpFGcheck (compare ME results from the CUDA/HIP and Fortran with CUDA/HIP MEs standalone executables, with a small number of events) -cmpFGcheck: all.$(TAG) - @echo - @echo "$(BUILDDIR)/gcheck.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fgcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/gcheck.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +ifeq ($(GPUCC),) + @echo "$(BUILDDIR)/check_cpp.exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_cpp.exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_cpp.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +else + @echo "$(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +endif # Target: cuda-memcheck (run the CUDA standalone executable gcheck.exe with a small number of events through cuda-memcheck) cuda-memcheck: all.$(TAG) - $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/gcheck.exe -p 2 32 2 + $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/makefile b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/makefile index 22517d0cc6..6a21300f7a 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/makefile +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/makefile @@ -58,13 +58,15 @@ endif LINKLIBS = $(LINK_MADLOOP_LIB) $(LINK_LOOP_LIBS) -L$(LIBDIR) -ldhelas -ldsample -lmodel -lgeneric -lpdf -lcernlib $(llhapdf) -lbias CUDACPP_MAKEFILE=cudacpp.mk -CUDACPP_COMMONLIB=mg5amc_common processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') ifeq ($(BACKEND),cuda) +CUDACPP_COMMONLIB=mg5amc_common_cuda CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_cuda else ifeq ($(BACKEND),hip) +CUDACPP_COMMONLIB=mg5amc_common_hip CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_hip else +CUDACPP_COMMONLIB=mg5amc_common_cpp CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_cpp endif diff --git a/epochX/cudacpp/gg_ttgg.mad/src/cudacpp_src.mk b/epochX/cudacpp/gg_ttgg.mad/src/cudacpp_src.mk index f727c0b313..d4cc8f4b78 100644 --- a/epochX/cudacpp/gg_ttgg.mad/src/cudacpp_src.mk +++ b/epochX/cudacpp/gg_ttgg.mad/src/cudacpp_src.mk @@ -98,7 +98,11 @@ endif # NB1: there are no CUDA targets in src as we avoid RDC! # NB2: CUDA includes for curand.h are no longer needed in the C++ code anywhere in src! -MG5AMC_COMMONLIB = mg5amc_common +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) @@ -126,7 +130,7 @@ $(LIBDIR)/.build.$(TAG): #------------------------------------------------------------------------------- # Generic target and build rules: objects from C++ compilation -$(BUILDDIR)/%.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -139,22 +143,22 @@ endif #------------------------------------------------------------------------------- -cxx_objects=$(addprefix $(BUILDDIR)/, read_slha.o) -ifneq ($(GPUCC),) - gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_sm_$(GPUSUFFIX).o) +cxx_objects=$(addprefix $(BUILDDIR)/, read_slha_cpp.o) +ifeq ($(GPUCC),) + cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_sm_cpp.o) else - cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_sm.o) + gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_sm_$(GPUSUFFIX).o) endif # Target (and build rules): common (src) library -ifneq ($(GPUCC),) -$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) - @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) -else +ifeq ($(GPUCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) +else +$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) + @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi + $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index 210673a0f2..bfb761500c 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005852460861206055  +DEBUG: model prefixing takes 0.0052258968353271484  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.168 s +1 processes with 123 diagrams generated in 0.156 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT @@ -178,7 +178,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.452 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.410 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -186,7 +186,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.346 s +ALOHA: aloha creates 5 routines in 0.300 s VVV1 VVV1 FFV1 @@ -209,7 +209,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. quit -real 0m1.531s -user 0m1.465s -sys 0m0.053s +real 0m1.393s +user 0m1.323s +sys 0m0.058s Code generation completed in 2 seconds diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk index f047530438..f8d86fd493 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk @@ -54,65 +54,6 @@ endif #------------------------------------------------------------------------------- -#=== Configure common compiler flags for C++ and CUDA/HIP - -INCFLAGS = -I. -OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here - -# Dependency on src directory -MG5AMC_COMMONLIB = mg5amc_common -LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -INCFLAGS += -I../../src - -# Compiler-specific googletest build directory (#125 and #738) -ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) - override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) -else ifneq ($(shell $(CXX) --version | egrep '^clang'),) - override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) - override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else - override CXXNAME = unknown -endif -###$(info CXXNAME=$(CXXNAME)) -override CXXNAMESUFFIX = _$(CXXNAME) - -# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) -export CXXNAMESUFFIX - -# Dependency on test directory -# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) -# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) -###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation -###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation -TESTDIRCOMMON = ../../../../../test -TESTDIRLOCAL = ../../test -ifneq ($(wildcard $(GTEST_ROOT)),) - TESTDIR = -else ifneq ($(LOCALGTEST),) - TESTDIR=$(TESTDIRLOCAL) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) - TESTDIR = $(TESTDIRCOMMON) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else - TESTDIR = -endif -ifneq ($(GTEST_ROOT),) - GTESTLIBDIR = $(GTEST_ROOT)/lib64/ - GTESTLIBS = $(GTESTLIBDIR)/libgtest.a - GTESTINC = -I$(GTEST_ROOT)/include -else - GTESTLIBDIR = - GTESTLIBS = - GTESTINC = -endif -###$(info GTEST_ROOT = $(GTEST_ROOT)) -###$(info LOCALGTEST = $(LOCALGTEST)) -###$(info TESTDIR = $(TESTDIR)) - -#------------------------------------------------------------------------------- - #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available: eventually use native instead?) @@ -340,6 +281,69 @@ endif #------------------------------------------------------------------------------- +#=== Configure common compiler flags for C++ and CUDA/HIP + +INCFLAGS = -I. +OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here + +# Dependency on src directory +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif +LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +INCFLAGS += -I../../src + +# Compiler-specific googletest build directory (#125 and #738) +ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) + override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) +else ifneq ($(shell $(CXX) --version | egrep '^clang'),) + override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) + override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else + override CXXNAME = unknown +endif +###$(info CXXNAME=$(CXXNAME)) +override CXXNAMESUFFIX = _$(CXXNAME) + +# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) +export CXXNAMESUFFIX + +# Dependency on test directory +# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) +# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) +###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation +###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation +TESTDIRCOMMON = ../../../../../test +TESTDIRLOCAL = ../../test +ifneq ($(wildcard $(GTEST_ROOT)),) + TESTDIR = +else ifneq ($(LOCALGTEST),) + TESTDIR=$(TESTDIRLOCAL) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) + TESTDIR = $(TESTDIRCOMMON) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else + TESTDIR = +endif +ifneq ($(GTEST_ROOT),) + GTESTLIBDIR = $(GTEST_ROOT)/lib64/ + GTESTLIBS = $(GTESTLIBDIR)/libgtest.a + GTESTINC = -I$(GTEST_ROOT)/include +else + GTESTLIBDIR = + GTESTLIBS = + GTESTINC = +endif +###$(info GTEST_ROOT = $(GTEST_ROOT)) +###$(info LOCALGTEST = $(LOCALGTEST)) +###$(info TESTDIR = $(TESTDIR)) + +#------------------------------------------------------------------------------- + #=== Configure PowerPC-specific compiler flags for C++ and CUDA/HIP # PowerPC-specific CXX compiler flags (being reviewed) @@ -606,27 +610,25 @@ override RUNTIME = #=== Makefile TARGETS and build rules below #=============================================================================== -cxx_main=$(BUILDDIR)/check.exe -fcxx_main=$(BUILDDIR)/fcheck.exe -ifneq ($(GPUCC),) - gpu_main=$(BUILDDIR)/gcheck.exe - fgpu_main=$(BUILDDIR)/fgcheck.exe +ifeq ($(GPUCC),) + cxx_checkmain=$(BUILDDIR)/check_cpp.exe + cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe + cxx_testmain=$(BUILDDIR)/runTest_cpp.exe else - gpu_main= - fgpu_main= + gpu_checkmain=$(BUILDDIR)/check_$(GPUSUFFIX).exe + gpu_fcheckmain=$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe + gpu_testmain=$(BUILDDIR)/runTest_$(GPUSUFFIX).exe endif -testmain=$(BUILDDIR)/runTest.exe - # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) # First target (default goal) -ifneq ($(GPUCC),) -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_main) $(fgpu_main) $(if $(GTESTLIBS),$(testmain)) +ifeq ($(GPUCC),) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_checkmain) $(cxx_fcheckmain) $(if $(GTESTLIBS),$(cxx_testmain)) else -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_main) $(fcxx_main) $(if $(GTESTLIBS),$(testmain)) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_checkmain) $(gpu_fcheckmain) $(if $(GTESTLIBS),$(gpu_testmain)) endif # Target (and build options): debug @@ -643,30 +645,30 @@ $(BUILDDIR)/.build.$(TAG): @if [ "$(oldtagsb)" != "" ]; then echo "Cannot build for tag=$(TAG) as old builds exist for other tags:"; echo " $(oldtagsb)"; echo "Please run 'make clean' first\nIf 'make clean' is not enough: run 'make clean USEBUILDDIR=1 AVX=$(AVX) FPTYPE=$(FPTYPE)' or 'make cleanall'"; exit 1; fi @touch $(BUILDDIR)/.build.$(TAG) -# Apply special build flags only to CrossSectionKernel[_$(GPUSUFFIX)].o (no fast math, see #117 and #516) +# Apply special build flags only to CrossSectionKernel_.o (no fast math, see #117 and #516) # Added edgecase for HIP compilation ifeq ($(shell $(CXX) --version | grep ^nvc++),) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -fno-fast-math -$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math endif -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o (NVTX in timermap.h, #679) -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) +# Apply special build flags only to check_sa_.o (NVTX in timermap.h, #679) +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o and (Cu|Hip)randRandomNumberKernel[_$(GPUSUFFIX)].o -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(RNDCXXFLAGS) +# Apply special build flags only to check_sa_.o and (Cu|Hip)randRandomNumberKernel_.o +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) ifeq ($(HASCURAND),hasCurand) # curand headers, #679 -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUDA_INC) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(CUDA_INC) endif ifeq ($(HASHIPRAND),hasHiprand) # hiprand headers -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(HIP_INC) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(HIP_INC) endif # Avoid "warning: builtin __has_trivial_... is deprecated; use __is_trivially_... instead" in GPUCC with icx2023 (#592) @@ -679,21 +681,21 @@ endif # Avoid clang warning "overriding '-ffp-contract=fast' option with '-ffp-contract=on'" (#516) # This patch does remove the warning, but I prefer to keep it disabled for the moment... ###ifneq ($(shell $(CXX) --version | egrep '^(clang|Apple clang|Intel)'),) -###$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -Wno-overriding-t-option ###ifneq ($(GPUCC),) -###$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option ###endif ###endif #### Apply special build flags only to CPPProcess.o (-flto) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += -flto +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += -flto #### Apply special build flags only to CPPProcess.o (AVXFLAGS) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += $(AVXFLAGS) +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += $(AVXFLAGS) # Generic target and build rules: objects from C++ compilation # (NB do not include CUDA_INC here! add it only for NVTX or curand #679) -$(BUILDDIR)/%.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -718,8 +720,8 @@ processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') ###$(info processid_short=$(processid_short)) MG5AMC_CXXLIB = mg5amc_$(processid_short)_cpp -cxx_objects_lib=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o -cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel.o $(BUILDDIR)/RamboSamplingKernels.o +cxx_objects_lib=$(BUILDDIR)/CPPProcess_cpp.o $(BUILDDIR)/MatrixElementKernels_cpp.o $(BUILDDIR)/BridgeKernels_cpp.o $(BUILDDIR)/CrossSectionKernels_cpp.o +cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_cpp.o $(BUILDDIR)/RamboSamplingKernels_cpp.o ifneq ($(GPUCC),) MG5AMC_GPULIB = mg5amc_$(processid_short)_$(GPUSUFFIX) @@ -728,8 +730,8 @@ gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/ endif # Target (and build rules): C++ and CUDA/HIP shared libraries -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) @@ -756,63 +758,63 @@ endif #------------------------------------------------------------------------------- # Target (and build rules): C++ and CUDA/HIP standalone executables -$(cxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o - $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(RNDLIBFLAGS) +$(cxx_checkmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_checkmain): $(BUILDDIR)/check_sa_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o + $(CXX) -o $@ $(BUILDDIR)/check_sa_cpp.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o $(RNDLIBFLAGS) ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(gpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(gpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_checkmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_checkmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(gpu_main): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_checkmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(gpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(gpu_main): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o +$(gpu_checkmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_checkmain): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(GPUCC) -o $@ $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(RNDLIBFLAGS) endif #------------------------------------------------------------------------------- # Generic target and build rules: objects from Fortran compilation -$(BUILDDIR)/%.o : %.f *.inc +$(BUILDDIR)/%_fortran.o : %.f *.inc @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(FC) -I. -c $< -o $@ # Generic target and build rules: objects from Fortran compilation -###$(BUILDDIR)/%.o : %.f *.inc +###$(BUILDDIR)/%_fortran.o : %.f *.inc ### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi ### @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi ### $(FC) -I. -I$(INCDIR) -c $< -o $@ # Target (and build rules): Fortran standalone executables -###$(BUILDDIR)/fcheck_sa.o : $(INCDIR)/fbridge.inc +###$(BUILDDIR)/fcheck_sa_fortran.o : $(INCDIR)/fbridge.inc ifeq ($(UNAME_S),Darwin) -$(fcxx_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(cxx_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fcxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fcxx_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) +$(cxx_fcheckmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ else - $(CXX) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) + $(CXX) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) endif ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(fgpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(fgpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_fcheckmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_fcheckmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') endif ifeq ($(UNAME_S),Darwin) -$(fgpu_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(gpu_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fgpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fgpu_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) +$(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else - $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) + $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif endif @@ -820,83 +822,94 @@ endif # Target (and build rules): test objects and test executable ifeq ($(GPUCC),) -$(BUILDDIR)/testxxx.o: $(GTESTLIBS) -$(BUILDDIR)/testxxx.o: INCFLAGS += $(GTESTINC) -$(BUILDDIR)/testxxx.o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testxxx.o # Comment out this line to skip the C++ test of xxx functions +$(BUILDDIR)/testxxx_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testxxx_cpp.o: INCFLAGS += $(GTESTINC) +$(BUILDDIR)/testxxx_cpp.o: testxxx_cc_ref.txt +$(cxx_testmain): $(BUILDDIR)/testxxx_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testxxx_cpp.o # Comment out this line to skip the C++ test of xxx functions else $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions +$(gpu_testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions endif ifneq ($(UNAME_S),Darwin) # Disable testmisc on Darwin (workaround for issue #838) ifeq ($(GPUCC),) -$(BUILDDIR)/testmisc.o: $(GTESTLIBS) -$(BUILDDIR)/testmisc.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testmisc.o # Comment out this line to skip the C++ miscellaneous tests +$(BUILDDIR)/testmisc_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testmisc_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/testmisc_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testmisc_cpp.o # Comment out this line to skip the C++ miscellaneous tests else $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests +$(gpu_testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests endif endif ifeq ($(GPUCC),) -$(BUILDDIR)/runTest.o: $(GTESTLIBS) -$(BUILDDIR)/runTest.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/runTest.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/runTest.o +$(BUILDDIR)/runTest_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/runTest_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/runTest_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/runTest_cpp.o else $(BUILDDIR)/runTest_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/runTest_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o endif -$(testmain): $(GTESTLIBS) -$(testmain): INCFLAGS += $(GTESTINC) -$(testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest -###$(testmain): LIBFLAGS += -lgtest_main # no longer necessary since we added main() to testxxx.cc +ifeq ($(GPUCC),) +$(cxx_testmain): $(GTESTLIBS) +$(cxx_testmain): INCFLAGS += $(GTESTINC) +$(cxx_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +else +$(gpu_testmain): $(GTESTLIBS) +$(gpu_testmain): INCFLAGS += $(GTESTINC) +$(gpu_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +endif +ifeq ($(GPUCC),) # if at all, OMP is used only in CXX builds (not in GPU builds) ifneq ($(OMPFLAGS),) ifneq ($(shell $(CXX) --version | egrep '^Intel'),) -$(testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) +$(cxx_testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) else ifneq ($(shell $(CXX) --version | egrep '^clang'),) -$(testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 +$(cxx_testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 ###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -###$(testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) +###$(cxx_testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) else -$(testmain): LIBFLAGS += -lgomp +$(cxx_testmain): LIBFLAGS += -lgomp +endif endif endif # Test quadmath in testmisc.cc tests for constexpr_math #627 -###$(testmain): LIBFLAGS += -lquadmath +###ifeq ($(GPUCC),) +###$(cxx_testmain): LIBFLAGS += -lquadmath +###else +###$(gpu_testmain): LIBFLAGS += -lquadmath +###endif # Bypass std::filesystem completely to ease portability on LUMI #803 -#ifneq ($(findstring hipcc,$(GPUCC)),) -#$(testmain): LIBFLAGS += -lstdc++fs -#endif +###ifneq ($(findstring hipcc,$(GPUCC)),) +###$(gpu_testmain): LIBFLAGS += -lstdc++fs +###endif -ifeq ($(GPUCC),) # link only runTest.o -$(testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) +ifeq ($(GPUCC),) # link only runTest_cpp.o +$(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) $(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS) -else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest.o and runTest_$(GPUSUFFIX).o) -$(testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) +else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o) +$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else @@ -1051,51 +1064,50 @@ endif #------------------------------------------------------------------------------- -# Target: check (run the C++ test executable) -# [NB THIS IS WHAT IS USED IN THE GITHUB CI!] -# [FIXME: SHOULD CHANGE THE TARGET NAME "check" THAT HAS NOTHING TO DO WITH "check.exe"] -ifneq ($(GPUCC),) -check: runTest cmpFGcheck -else -check: runTest cmpFcheck -endif +# Target: 'make test' (execute runTest.exe, and compare check.exe with fcheck.exe) +# [NB: THIS IS WHAT IS TESTED IN THE GITHUB CI!] +# [NB: This used to be called 'make check' but the name has been changed as this has nothing to do with 'check.exe'] +test: runTest cmpFcheck -# Target: runTest (run the C++ test executable runTest.exe) +# Target: runTest (run the C++ or CUDA/HIP test executable runTest.exe) runTest: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/runTest.exe +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/runTest_cpp.exe +else + $(RUNTIME) $(BUILDDIR)/runTest_$(GPUSUFFIX).exe +endif -# Target: runCheck (run the C++ standalone executable check.exe, with a small number of events) +# Target: runCheck (run the C++ or CUDA/HIP standalone executable check.exe, with a small number of events) runCheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/check.exe -p 2 32 2 - -# Target: runGcheck (run the CUDA/HIP standalone executable gcheck.exe, with a small number of events) -runGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/gcheck.exe -p 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/check_cpp.exe -p 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +endif -# Target: runFcheck (run the Fortran standalone executable - with C++ MEs - fcheck.exe, with a small number of events) +# Target: runFcheck (run the Fortran standalone executable - with C++ or CUDA/HIP MEs - fcheck.exe, with a small number of events) runFcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 - -# Target: runFGcheck (run the Fortran standalone executable - with CUDA/HIP MEs - fgcheck.exe, with a small number of events) -runFGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 +endif -# Target: cmpFcheck (compare ME results from the C++ and Fortran with C++ MEs standalone executables, with a small number of events) +# Target: cmpFcheck (compare ME results from the C++/CUDA/HIP and Fortran with C++/CUDA/HIP MEs standalone executables, with a small number of events) cmpFcheck: all.$(TAG) @echo - @echo "$(BUILDDIR)/check.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/check.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi - -# Target: cmpFGcheck (compare ME results from the CUDA/HIP and Fortran with CUDA/HIP MEs standalone executables, with a small number of events) -cmpFGcheck: all.$(TAG) - @echo - @echo "$(BUILDDIR)/gcheck.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fgcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/gcheck.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +ifeq ($(GPUCC),) + @echo "$(BUILDDIR)/check_cpp.exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_cpp.exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_cpp.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +else + @echo "$(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +endif # Target: cuda-memcheck (run the CUDA standalone executable gcheck.exe with a small number of events through cuda-memcheck) cuda-memcheck: all.$(TAG) - $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/gcheck.exe -p 2 32 2 + $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttgg.sa/src/cudacpp_src.mk b/epochX/cudacpp/gg_ttgg.sa/src/cudacpp_src.mk index f727c0b313..d4cc8f4b78 100644 --- a/epochX/cudacpp/gg_ttgg.sa/src/cudacpp_src.mk +++ b/epochX/cudacpp/gg_ttgg.sa/src/cudacpp_src.mk @@ -98,7 +98,11 @@ endif # NB1: there are no CUDA targets in src as we avoid RDC! # NB2: CUDA includes for curand.h are no longer needed in the C++ code anywhere in src! -MG5AMC_COMMONLIB = mg5amc_common +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) @@ -126,7 +130,7 @@ $(LIBDIR)/.build.$(TAG): #------------------------------------------------------------------------------- # Generic target and build rules: objects from C++ compilation -$(BUILDDIR)/%.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -139,22 +143,22 @@ endif #------------------------------------------------------------------------------- -cxx_objects=$(addprefix $(BUILDDIR)/, read_slha.o) -ifneq ($(GPUCC),) - gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_sm_$(GPUSUFFIX).o) +cxx_objects=$(addprefix $(BUILDDIR)/, read_slha_cpp.o) +ifeq ($(GPUCC),) + cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_sm_cpp.o) else - cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_sm.o) + gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_sm_$(GPUSUFFIX).o) endif # Target (and build rules): common (src) library -ifneq ($(GPUCC),) -$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) - @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) -else +ifeq ($(GPUCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) +else +$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) + @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi + $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index a28e2c3783..a2d6829e85 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005841732025146484  +DEBUG: model prefixing takes 0.005289793014526367  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 2.008 s +1 processes with 1240 diagrams generated in 1.846 s Total: 1 processes with 1240 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -178,9 +178,9 @@ INFO: Generating Helas calls for process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Processing color information for process: g g > t t~ g g g @1 INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] -INFO: Color-Flow passed to 1630 term in 8s. Introduce 3030 contraction +INFO: Color-Flow passed to 1630 term in 7s. Introduce 3030 contraction DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -195,15 +195,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -Generated helas calls for 1 subprocesses (1240 diagrams) in 7.025 s -Wrote files for 2281 helas calls in 19.714 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.350 s +Wrote files for 2281 helas calls in 17.442 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.340 s +ALOHA: aloha creates 5 routines in 0.299 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -211,7 +211,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.333 s +ALOHA: aloha creates 10 routines in 0.293 s VVV1 VVV1 FFV1 @@ -259,10 +259,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m31.105s -user 0m30.577s -sys 0m0.429s -Code generation completed in 31 seconds +real 0m27.829s +user 0m27.337s +sys 0m0.391s +Code generation completed in 27 seconds ************************************************************ * * * W E L C O M E to * @@ -287,8 +287,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run quit INFO: @@ -317,8 +317,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards param quit INFO: diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk index f047530438..f8d86fd493 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk @@ -54,65 +54,6 @@ endif #------------------------------------------------------------------------------- -#=== Configure common compiler flags for C++ and CUDA/HIP - -INCFLAGS = -I. -OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here - -# Dependency on src directory -MG5AMC_COMMONLIB = mg5amc_common -LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -INCFLAGS += -I../../src - -# Compiler-specific googletest build directory (#125 and #738) -ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) - override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) -else ifneq ($(shell $(CXX) --version | egrep '^clang'),) - override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) - override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else - override CXXNAME = unknown -endif -###$(info CXXNAME=$(CXXNAME)) -override CXXNAMESUFFIX = _$(CXXNAME) - -# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) -export CXXNAMESUFFIX - -# Dependency on test directory -# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) -# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) -###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation -###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation -TESTDIRCOMMON = ../../../../../test -TESTDIRLOCAL = ../../test -ifneq ($(wildcard $(GTEST_ROOT)),) - TESTDIR = -else ifneq ($(LOCALGTEST),) - TESTDIR=$(TESTDIRLOCAL) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) - TESTDIR = $(TESTDIRCOMMON) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else - TESTDIR = -endif -ifneq ($(GTEST_ROOT),) - GTESTLIBDIR = $(GTEST_ROOT)/lib64/ - GTESTLIBS = $(GTESTLIBDIR)/libgtest.a - GTESTINC = -I$(GTEST_ROOT)/include -else - GTESTLIBDIR = - GTESTLIBS = - GTESTINC = -endif -###$(info GTEST_ROOT = $(GTEST_ROOT)) -###$(info LOCALGTEST = $(LOCALGTEST)) -###$(info TESTDIR = $(TESTDIR)) - -#------------------------------------------------------------------------------- - #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available: eventually use native instead?) @@ -340,6 +281,69 @@ endif #------------------------------------------------------------------------------- +#=== Configure common compiler flags for C++ and CUDA/HIP + +INCFLAGS = -I. +OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here + +# Dependency on src directory +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif +LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +INCFLAGS += -I../../src + +# Compiler-specific googletest build directory (#125 and #738) +ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) + override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) +else ifneq ($(shell $(CXX) --version | egrep '^clang'),) + override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) + override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else + override CXXNAME = unknown +endif +###$(info CXXNAME=$(CXXNAME)) +override CXXNAMESUFFIX = _$(CXXNAME) + +# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) +export CXXNAMESUFFIX + +# Dependency on test directory +# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) +# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) +###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation +###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation +TESTDIRCOMMON = ../../../../../test +TESTDIRLOCAL = ../../test +ifneq ($(wildcard $(GTEST_ROOT)),) + TESTDIR = +else ifneq ($(LOCALGTEST),) + TESTDIR=$(TESTDIRLOCAL) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) + TESTDIR = $(TESTDIRCOMMON) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else + TESTDIR = +endif +ifneq ($(GTEST_ROOT),) + GTESTLIBDIR = $(GTEST_ROOT)/lib64/ + GTESTLIBS = $(GTESTLIBDIR)/libgtest.a + GTESTINC = -I$(GTEST_ROOT)/include +else + GTESTLIBDIR = + GTESTLIBS = + GTESTINC = +endif +###$(info GTEST_ROOT = $(GTEST_ROOT)) +###$(info LOCALGTEST = $(LOCALGTEST)) +###$(info TESTDIR = $(TESTDIR)) + +#------------------------------------------------------------------------------- + #=== Configure PowerPC-specific compiler flags for C++ and CUDA/HIP # PowerPC-specific CXX compiler flags (being reviewed) @@ -606,27 +610,25 @@ override RUNTIME = #=== Makefile TARGETS and build rules below #=============================================================================== -cxx_main=$(BUILDDIR)/check.exe -fcxx_main=$(BUILDDIR)/fcheck.exe -ifneq ($(GPUCC),) - gpu_main=$(BUILDDIR)/gcheck.exe - fgpu_main=$(BUILDDIR)/fgcheck.exe +ifeq ($(GPUCC),) + cxx_checkmain=$(BUILDDIR)/check_cpp.exe + cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe + cxx_testmain=$(BUILDDIR)/runTest_cpp.exe else - gpu_main= - fgpu_main= + gpu_checkmain=$(BUILDDIR)/check_$(GPUSUFFIX).exe + gpu_fcheckmain=$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe + gpu_testmain=$(BUILDDIR)/runTest_$(GPUSUFFIX).exe endif -testmain=$(BUILDDIR)/runTest.exe - # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) # First target (default goal) -ifneq ($(GPUCC),) -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_main) $(fgpu_main) $(if $(GTESTLIBS),$(testmain)) +ifeq ($(GPUCC),) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_checkmain) $(cxx_fcheckmain) $(if $(GTESTLIBS),$(cxx_testmain)) else -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_main) $(fcxx_main) $(if $(GTESTLIBS),$(testmain)) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_checkmain) $(gpu_fcheckmain) $(if $(GTESTLIBS),$(gpu_testmain)) endif # Target (and build options): debug @@ -643,30 +645,30 @@ $(BUILDDIR)/.build.$(TAG): @if [ "$(oldtagsb)" != "" ]; then echo "Cannot build for tag=$(TAG) as old builds exist for other tags:"; echo " $(oldtagsb)"; echo "Please run 'make clean' first\nIf 'make clean' is not enough: run 'make clean USEBUILDDIR=1 AVX=$(AVX) FPTYPE=$(FPTYPE)' or 'make cleanall'"; exit 1; fi @touch $(BUILDDIR)/.build.$(TAG) -# Apply special build flags only to CrossSectionKernel[_$(GPUSUFFIX)].o (no fast math, see #117 and #516) +# Apply special build flags only to CrossSectionKernel_.o (no fast math, see #117 and #516) # Added edgecase for HIP compilation ifeq ($(shell $(CXX) --version | grep ^nvc++),) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -fno-fast-math -$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math endif -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o (NVTX in timermap.h, #679) -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) +# Apply special build flags only to check_sa_.o (NVTX in timermap.h, #679) +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o and (Cu|Hip)randRandomNumberKernel[_$(GPUSUFFIX)].o -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(RNDCXXFLAGS) +# Apply special build flags only to check_sa_.o and (Cu|Hip)randRandomNumberKernel_.o +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) ifeq ($(HASCURAND),hasCurand) # curand headers, #679 -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUDA_INC) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(CUDA_INC) endif ifeq ($(HASHIPRAND),hasHiprand) # hiprand headers -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(HIP_INC) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(HIP_INC) endif # Avoid "warning: builtin __has_trivial_... is deprecated; use __is_trivially_... instead" in GPUCC with icx2023 (#592) @@ -679,21 +681,21 @@ endif # Avoid clang warning "overriding '-ffp-contract=fast' option with '-ffp-contract=on'" (#516) # This patch does remove the warning, but I prefer to keep it disabled for the moment... ###ifneq ($(shell $(CXX) --version | egrep '^(clang|Apple clang|Intel)'),) -###$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -Wno-overriding-t-option ###ifneq ($(GPUCC),) -###$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option ###endif ###endif #### Apply special build flags only to CPPProcess.o (-flto) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += -flto +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += -flto #### Apply special build flags only to CPPProcess.o (AVXFLAGS) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += $(AVXFLAGS) +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += $(AVXFLAGS) # Generic target and build rules: objects from C++ compilation # (NB do not include CUDA_INC here! add it only for NVTX or curand #679) -$(BUILDDIR)/%.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -718,8 +720,8 @@ processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') ###$(info processid_short=$(processid_short)) MG5AMC_CXXLIB = mg5amc_$(processid_short)_cpp -cxx_objects_lib=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o -cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel.o $(BUILDDIR)/RamboSamplingKernels.o +cxx_objects_lib=$(BUILDDIR)/CPPProcess_cpp.o $(BUILDDIR)/MatrixElementKernels_cpp.o $(BUILDDIR)/BridgeKernels_cpp.o $(BUILDDIR)/CrossSectionKernels_cpp.o +cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_cpp.o $(BUILDDIR)/RamboSamplingKernels_cpp.o ifneq ($(GPUCC),) MG5AMC_GPULIB = mg5amc_$(processid_short)_$(GPUSUFFIX) @@ -728,8 +730,8 @@ gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/ endif # Target (and build rules): C++ and CUDA/HIP shared libraries -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) @@ -756,63 +758,63 @@ endif #------------------------------------------------------------------------------- # Target (and build rules): C++ and CUDA/HIP standalone executables -$(cxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o - $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(RNDLIBFLAGS) +$(cxx_checkmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_checkmain): $(BUILDDIR)/check_sa_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o + $(CXX) -o $@ $(BUILDDIR)/check_sa_cpp.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o $(RNDLIBFLAGS) ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(gpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(gpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_checkmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_checkmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(gpu_main): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_checkmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(gpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(gpu_main): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o +$(gpu_checkmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_checkmain): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(GPUCC) -o $@ $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(RNDLIBFLAGS) endif #------------------------------------------------------------------------------- # Generic target and build rules: objects from Fortran compilation -$(BUILDDIR)/%.o : %.f *.inc +$(BUILDDIR)/%_fortran.o : %.f *.inc @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(FC) -I. -c $< -o $@ # Generic target and build rules: objects from Fortran compilation -###$(BUILDDIR)/%.o : %.f *.inc +###$(BUILDDIR)/%_fortran.o : %.f *.inc ### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi ### @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi ### $(FC) -I. -I$(INCDIR) -c $< -o $@ # Target (and build rules): Fortran standalone executables -###$(BUILDDIR)/fcheck_sa.o : $(INCDIR)/fbridge.inc +###$(BUILDDIR)/fcheck_sa_fortran.o : $(INCDIR)/fbridge.inc ifeq ($(UNAME_S),Darwin) -$(fcxx_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(cxx_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fcxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fcxx_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) +$(cxx_fcheckmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ else - $(CXX) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) + $(CXX) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) endif ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(fgpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(fgpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_fcheckmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_fcheckmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') endif ifeq ($(UNAME_S),Darwin) -$(fgpu_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(gpu_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fgpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fgpu_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) +$(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else - $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) + $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif endif @@ -820,83 +822,94 @@ endif # Target (and build rules): test objects and test executable ifeq ($(GPUCC),) -$(BUILDDIR)/testxxx.o: $(GTESTLIBS) -$(BUILDDIR)/testxxx.o: INCFLAGS += $(GTESTINC) -$(BUILDDIR)/testxxx.o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testxxx.o # Comment out this line to skip the C++ test of xxx functions +$(BUILDDIR)/testxxx_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testxxx_cpp.o: INCFLAGS += $(GTESTINC) +$(BUILDDIR)/testxxx_cpp.o: testxxx_cc_ref.txt +$(cxx_testmain): $(BUILDDIR)/testxxx_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testxxx_cpp.o # Comment out this line to skip the C++ test of xxx functions else $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions +$(gpu_testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions endif ifneq ($(UNAME_S),Darwin) # Disable testmisc on Darwin (workaround for issue #838) ifeq ($(GPUCC),) -$(BUILDDIR)/testmisc.o: $(GTESTLIBS) -$(BUILDDIR)/testmisc.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testmisc.o # Comment out this line to skip the C++ miscellaneous tests +$(BUILDDIR)/testmisc_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testmisc_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/testmisc_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testmisc_cpp.o # Comment out this line to skip the C++ miscellaneous tests else $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests +$(gpu_testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests endif endif ifeq ($(GPUCC),) -$(BUILDDIR)/runTest.o: $(GTESTLIBS) -$(BUILDDIR)/runTest.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/runTest.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/runTest.o +$(BUILDDIR)/runTest_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/runTest_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/runTest_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/runTest_cpp.o else $(BUILDDIR)/runTest_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/runTest_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o endif -$(testmain): $(GTESTLIBS) -$(testmain): INCFLAGS += $(GTESTINC) -$(testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest -###$(testmain): LIBFLAGS += -lgtest_main # no longer necessary since we added main() to testxxx.cc +ifeq ($(GPUCC),) +$(cxx_testmain): $(GTESTLIBS) +$(cxx_testmain): INCFLAGS += $(GTESTINC) +$(cxx_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +else +$(gpu_testmain): $(GTESTLIBS) +$(gpu_testmain): INCFLAGS += $(GTESTINC) +$(gpu_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +endif +ifeq ($(GPUCC),) # if at all, OMP is used only in CXX builds (not in GPU builds) ifneq ($(OMPFLAGS),) ifneq ($(shell $(CXX) --version | egrep '^Intel'),) -$(testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) +$(cxx_testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) else ifneq ($(shell $(CXX) --version | egrep '^clang'),) -$(testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 +$(cxx_testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 ###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -###$(testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) +###$(cxx_testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) else -$(testmain): LIBFLAGS += -lgomp +$(cxx_testmain): LIBFLAGS += -lgomp +endif endif endif # Test quadmath in testmisc.cc tests for constexpr_math #627 -###$(testmain): LIBFLAGS += -lquadmath +###ifeq ($(GPUCC),) +###$(cxx_testmain): LIBFLAGS += -lquadmath +###else +###$(gpu_testmain): LIBFLAGS += -lquadmath +###endif # Bypass std::filesystem completely to ease portability on LUMI #803 -#ifneq ($(findstring hipcc,$(GPUCC)),) -#$(testmain): LIBFLAGS += -lstdc++fs -#endif +###ifneq ($(findstring hipcc,$(GPUCC)),) +###$(gpu_testmain): LIBFLAGS += -lstdc++fs +###endif -ifeq ($(GPUCC),) # link only runTest.o -$(testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) +ifeq ($(GPUCC),) # link only runTest_cpp.o +$(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) $(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS) -else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest.o and runTest_$(GPUSUFFIX).o) -$(testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) +else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o) +$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else @@ -1051,51 +1064,50 @@ endif #------------------------------------------------------------------------------- -# Target: check (run the C++ test executable) -# [NB THIS IS WHAT IS USED IN THE GITHUB CI!] -# [FIXME: SHOULD CHANGE THE TARGET NAME "check" THAT HAS NOTHING TO DO WITH "check.exe"] -ifneq ($(GPUCC),) -check: runTest cmpFGcheck -else -check: runTest cmpFcheck -endif +# Target: 'make test' (execute runTest.exe, and compare check.exe with fcheck.exe) +# [NB: THIS IS WHAT IS TESTED IN THE GITHUB CI!] +# [NB: This used to be called 'make check' but the name has been changed as this has nothing to do with 'check.exe'] +test: runTest cmpFcheck -# Target: runTest (run the C++ test executable runTest.exe) +# Target: runTest (run the C++ or CUDA/HIP test executable runTest.exe) runTest: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/runTest.exe +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/runTest_cpp.exe +else + $(RUNTIME) $(BUILDDIR)/runTest_$(GPUSUFFIX).exe +endif -# Target: runCheck (run the C++ standalone executable check.exe, with a small number of events) +# Target: runCheck (run the C++ or CUDA/HIP standalone executable check.exe, with a small number of events) runCheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/check.exe -p 2 32 2 - -# Target: runGcheck (run the CUDA/HIP standalone executable gcheck.exe, with a small number of events) -runGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/gcheck.exe -p 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/check_cpp.exe -p 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +endif -# Target: runFcheck (run the Fortran standalone executable - with C++ MEs - fcheck.exe, with a small number of events) +# Target: runFcheck (run the Fortran standalone executable - with C++ or CUDA/HIP MEs - fcheck.exe, with a small number of events) runFcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 - -# Target: runFGcheck (run the Fortran standalone executable - with CUDA/HIP MEs - fgcheck.exe, with a small number of events) -runFGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 +endif -# Target: cmpFcheck (compare ME results from the C++ and Fortran with C++ MEs standalone executables, with a small number of events) +# Target: cmpFcheck (compare ME results from the C++/CUDA/HIP and Fortran with C++/CUDA/HIP MEs standalone executables, with a small number of events) cmpFcheck: all.$(TAG) @echo - @echo "$(BUILDDIR)/check.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/check.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi - -# Target: cmpFGcheck (compare ME results from the CUDA/HIP and Fortran with CUDA/HIP MEs standalone executables, with a small number of events) -cmpFGcheck: all.$(TAG) - @echo - @echo "$(BUILDDIR)/gcheck.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fgcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/gcheck.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +ifeq ($(GPUCC),) + @echo "$(BUILDDIR)/check_cpp.exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_cpp.exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_cpp.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +else + @echo "$(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +endif # Target: cuda-memcheck (run the CUDA standalone executable gcheck.exe with a small number of events through cuda-memcheck) cuda-memcheck: all.$(TAG) - $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/gcheck.exe -p 2 32 2 + $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/makefile b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/makefile index 22517d0cc6..6a21300f7a 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/makefile +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/makefile @@ -58,13 +58,15 @@ endif LINKLIBS = $(LINK_MADLOOP_LIB) $(LINK_LOOP_LIBS) -L$(LIBDIR) -ldhelas -ldsample -lmodel -lgeneric -lpdf -lcernlib $(llhapdf) -lbias CUDACPP_MAKEFILE=cudacpp.mk -CUDACPP_COMMONLIB=mg5amc_common processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') ifeq ($(BACKEND),cuda) +CUDACPP_COMMONLIB=mg5amc_common_cuda CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_cuda else ifeq ($(BACKEND),hip) +CUDACPP_COMMONLIB=mg5amc_common_hip CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_hip else +CUDACPP_COMMONLIB=mg5amc_common_cpp CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_cpp endif diff --git a/epochX/cudacpp/gg_ttggg.mad/src/cudacpp_src.mk b/epochX/cudacpp/gg_ttggg.mad/src/cudacpp_src.mk index f727c0b313..d4cc8f4b78 100644 --- a/epochX/cudacpp/gg_ttggg.mad/src/cudacpp_src.mk +++ b/epochX/cudacpp/gg_ttggg.mad/src/cudacpp_src.mk @@ -98,7 +98,11 @@ endif # NB1: there are no CUDA targets in src as we avoid RDC! # NB2: CUDA includes for curand.h are no longer needed in the C++ code anywhere in src! -MG5AMC_COMMONLIB = mg5amc_common +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) @@ -126,7 +130,7 @@ $(LIBDIR)/.build.$(TAG): #------------------------------------------------------------------------------- # Generic target and build rules: objects from C++ compilation -$(BUILDDIR)/%.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -139,22 +143,22 @@ endif #------------------------------------------------------------------------------- -cxx_objects=$(addprefix $(BUILDDIR)/, read_slha.o) -ifneq ($(GPUCC),) - gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_sm_$(GPUSUFFIX).o) +cxx_objects=$(addprefix $(BUILDDIR)/, read_slha_cpp.o) +ifeq ($(GPUCC),) + cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_sm_cpp.o) else - cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_sm.o) + gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_sm_$(GPUSUFFIX).o) endif # Target (and build rules): common (src) library -ifneq ($(GPUCC),) -$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) - @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) -else +ifeq ($(GPUCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) +else +$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) + @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi + $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index d1d3da38ee..ce14eef7e6 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0058422088623046875  +DEBUG: model prefixing takes 0.00564885139465332  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 2.006 s +1 processes with 1240 diagrams generated in 1.834 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -178,7 +178,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 7.034 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.228 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -186,7 +186,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.373 s +ALOHA: aloha creates 5 routines in 0.323 s VVV1 VVV1 FFV1 @@ -209,7 +209,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. quit -real 0m13.957s -user 0m13.797s -sys 0m0.108s -Code generation completed in 14 seconds +real 0m12.339s +user 0m12.185s +sys 0m0.102s +Code generation completed in 13 seconds diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk index f047530438..f8d86fd493 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk @@ -54,65 +54,6 @@ endif #------------------------------------------------------------------------------- -#=== Configure common compiler flags for C++ and CUDA/HIP - -INCFLAGS = -I. -OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here - -# Dependency on src directory -MG5AMC_COMMONLIB = mg5amc_common -LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -INCFLAGS += -I../../src - -# Compiler-specific googletest build directory (#125 and #738) -ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) - override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) -else ifneq ($(shell $(CXX) --version | egrep '^clang'),) - override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) - override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else - override CXXNAME = unknown -endif -###$(info CXXNAME=$(CXXNAME)) -override CXXNAMESUFFIX = _$(CXXNAME) - -# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) -export CXXNAMESUFFIX - -# Dependency on test directory -# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) -# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) -###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation -###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation -TESTDIRCOMMON = ../../../../../test -TESTDIRLOCAL = ../../test -ifneq ($(wildcard $(GTEST_ROOT)),) - TESTDIR = -else ifneq ($(LOCALGTEST),) - TESTDIR=$(TESTDIRLOCAL) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) - TESTDIR = $(TESTDIRCOMMON) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else - TESTDIR = -endif -ifneq ($(GTEST_ROOT),) - GTESTLIBDIR = $(GTEST_ROOT)/lib64/ - GTESTLIBS = $(GTESTLIBDIR)/libgtest.a - GTESTINC = -I$(GTEST_ROOT)/include -else - GTESTLIBDIR = - GTESTLIBS = - GTESTINC = -endif -###$(info GTEST_ROOT = $(GTEST_ROOT)) -###$(info LOCALGTEST = $(LOCALGTEST)) -###$(info TESTDIR = $(TESTDIR)) - -#------------------------------------------------------------------------------- - #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available: eventually use native instead?) @@ -340,6 +281,69 @@ endif #------------------------------------------------------------------------------- +#=== Configure common compiler flags for C++ and CUDA/HIP + +INCFLAGS = -I. +OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here + +# Dependency on src directory +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif +LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +INCFLAGS += -I../../src + +# Compiler-specific googletest build directory (#125 and #738) +ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) + override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) +else ifneq ($(shell $(CXX) --version | egrep '^clang'),) + override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) + override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else + override CXXNAME = unknown +endif +###$(info CXXNAME=$(CXXNAME)) +override CXXNAMESUFFIX = _$(CXXNAME) + +# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) +export CXXNAMESUFFIX + +# Dependency on test directory +# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) +# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) +###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation +###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation +TESTDIRCOMMON = ../../../../../test +TESTDIRLOCAL = ../../test +ifneq ($(wildcard $(GTEST_ROOT)),) + TESTDIR = +else ifneq ($(LOCALGTEST),) + TESTDIR=$(TESTDIRLOCAL) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) + TESTDIR = $(TESTDIRCOMMON) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else + TESTDIR = +endif +ifneq ($(GTEST_ROOT),) + GTESTLIBDIR = $(GTEST_ROOT)/lib64/ + GTESTLIBS = $(GTESTLIBDIR)/libgtest.a + GTESTINC = -I$(GTEST_ROOT)/include +else + GTESTLIBDIR = + GTESTLIBS = + GTESTINC = +endif +###$(info GTEST_ROOT = $(GTEST_ROOT)) +###$(info LOCALGTEST = $(LOCALGTEST)) +###$(info TESTDIR = $(TESTDIR)) + +#------------------------------------------------------------------------------- + #=== Configure PowerPC-specific compiler flags for C++ and CUDA/HIP # PowerPC-specific CXX compiler flags (being reviewed) @@ -606,27 +610,25 @@ override RUNTIME = #=== Makefile TARGETS and build rules below #=============================================================================== -cxx_main=$(BUILDDIR)/check.exe -fcxx_main=$(BUILDDIR)/fcheck.exe -ifneq ($(GPUCC),) - gpu_main=$(BUILDDIR)/gcheck.exe - fgpu_main=$(BUILDDIR)/fgcheck.exe +ifeq ($(GPUCC),) + cxx_checkmain=$(BUILDDIR)/check_cpp.exe + cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe + cxx_testmain=$(BUILDDIR)/runTest_cpp.exe else - gpu_main= - fgpu_main= + gpu_checkmain=$(BUILDDIR)/check_$(GPUSUFFIX).exe + gpu_fcheckmain=$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe + gpu_testmain=$(BUILDDIR)/runTest_$(GPUSUFFIX).exe endif -testmain=$(BUILDDIR)/runTest.exe - # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) # First target (default goal) -ifneq ($(GPUCC),) -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_main) $(fgpu_main) $(if $(GTESTLIBS),$(testmain)) +ifeq ($(GPUCC),) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_checkmain) $(cxx_fcheckmain) $(if $(GTESTLIBS),$(cxx_testmain)) else -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_main) $(fcxx_main) $(if $(GTESTLIBS),$(testmain)) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_checkmain) $(gpu_fcheckmain) $(if $(GTESTLIBS),$(gpu_testmain)) endif # Target (and build options): debug @@ -643,30 +645,30 @@ $(BUILDDIR)/.build.$(TAG): @if [ "$(oldtagsb)" != "" ]; then echo "Cannot build for tag=$(TAG) as old builds exist for other tags:"; echo " $(oldtagsb)"; echo "Please run 'make clean' first\nIf 'make clean' is not enough: run 'make clean USEBUILDDIR=1 AVX=$(AVX) FPTYPE=$(FPTYPE)' or 'make cleanall'"; exit 1; fi @touch $(BUILDDIR)/.build.$(TAG) -# Apply special build flags only to CrossSectionKernel[_$(GPUSUFFIX)].o (no fast math, see #117 and #516) +# Apply special build flags only to CrossSectionKernel_.o (no fast math, see #117 and #516) # Added edgecase for HIP compilation ifeq ($(shell $(CXX) --version | grep ^nvc++),) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -fno-fast-math -$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math endif -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o (NVTX in timermap.h, #679) -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) +# Apply special build flags only to check_sa_.o (NVTX in timermap.h, #679) +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o and (Cu|Hip)randRandomNumberKernel[_$(GPUSUFFIX)].o -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(RNDCXXFLAGS) +# Apply special build flags only to check_sa_.o and (Cu|Hip)randRandomNumberKernel_.o +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) ifeq ($(HASCURAND),hasCurand) # curand headers, #679 -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUDA_INC) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(CUDA_INC) endif ifeq ($(HASHIPRAND),hasHiprand) # hiprand headers -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(HIP_INC) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(HIP_INC) endif # Avoid "warning: builtin __has_trivial_... is deprecated; use __is_trivially_... instead" in GPUCC with icx2023 (#592) @@ -679,21 +681,21 @@ endif # Avoid clang warning "overriding '-ffp-contract=fast' option with '-ffp-contract=on'" (#516) # This patch does remove the warning, but I prefer to keep it disabled for the moment... ###ifneq ($(shell $(CXX) --version | egrep '^(clang|Apple clang|Intel)'),) -###$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -Wno-overriding-t-option ###ifneq ($(GPUCC),) -###$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option ###endif ###endif #### Apply special build flags only to CPPProcess.o (-flto) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += -flto +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += -flto #### Apply special build flags only to CPPProcess.o (AVXFLAGS) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += $(AVXFLAGS) +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += $(AVXFLAGS) # Generic target and build rules: objects from C++ compilation # (NB do not include CUDA_INC here! add it only for NVTX or curand #679) -$(BUILDDIR)/%.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -718,8 +720,8 @@ processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') ###$(info processid_short=$(processid_short)) MG5AMC_CXXLIB = mg5amc_$(processid_short)_cpp -cxx_objects_lib=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o -cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel.o $(BUILDDIR)/RamboSamplingKernels.o +cxx_objects_lib=$(BUILDDIR)/CPPProcess_cpp.o $(BUILDDIR)/MatrixElementKernels_cpp.o $(BUILDDIR)/BridgeKernels_cpp.o $(BUILDDIR)/CrossSectionKernels_cpp.o +cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_cpp.o $(BUILDDIR)/RamboSamplingKernels_cpp.o ifneq ($(GPUCC),) MG5AMC_GPULIB = mg5amc_$(processid_short)_$(GPUSUFFIX) @@ -728,8 +730,8 @@ gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/ endif # Target (and build rules): C++ and CUDA/HIP shared libraries -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) @@ -756,63 +758,63 @@ endif #------------------------------------------------------------------------------- # Target (and build rules): C++ and CUDA/HIP standalone executables -$(cxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o - $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(RNDLIBFLAGS) +$(cxx_checkmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_checkmain): $(BUILDDIR)/check_sa_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o + $(CXX) -o $@ $(BUILDDIR)/check_sa_cpp.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o $(RNDLIBFLAGS) ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(gpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(gpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_checkmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_checkmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(gpu_main): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_checkmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(gpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(gpu_main): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o +$(gpu_checkmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_checkmain): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(GPUCC) -o $@ $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(RNDLIBFLAGS) endif #------------------------------------------------------------------------------- # Generic target and build rules: objects from Fortran compilation -$(BUILDDIR)/%.o : %.f *.inc +$(BUILDDIR)/%_fortran.o : %.f *.inc @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(FC) -I. -c $< -o $@ # Generic target and build rules: objects from Fortran compilation -###$(BUILDDIR)/%.o : %.f *.inc +###$(BUILDDIR)/%_fortran.o : %.f *.inc ### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi ### @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi ### $(FC) -I. -I$(INCDIR) -c $< -o $@ # Target (and build rules): Fortran standalone executables -###$(BUILDDIR)/fcheck_sa.o : $(INCDIR)/fbridge.inc +###$(BUILDDIR)/fcheck_sa_fortran.o : $(INCDIR)/fbridge.inc ifeq ($(UNAME_S),Darwin) -$(fcxx_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(cxx_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fcxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fcxx_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) +$(cxx_fcheckmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ else - $(CXX) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) + $(CXX) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) endif ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(fgpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(fgpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_fcheckmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_fcheckmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') endif ifeq ($(UNAME_S),Darwin) -$(fgpu_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(gpu_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fgpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fgpu_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) +$(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else - $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) + $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif endif @@ -820,83 +822,94 @@ endif # Target (and build rules): test objects and test executable ifeq ($(GPUCC),) -$(BUILDDIR)/testxxx.o: $(GTESTLIBS) -$(BUILDDIR)/testxxx.o: INCFLAGS += $(GTESTINC) -$(BUILDDIR)/testxxx.o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testxxx.o # Comment out this line to skip the C++ test of xxx functions +$(BUILDDIR)/testxxx_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testxxx_cpp.o: INCFLAGS += $(GTESTINC) +$(BUILDDIR)/testxxx_cpp.o: testxxx_cc_ref.txt +$(cxx_testmain): $(BUILDDIR)/testxxx_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testxxx_cpp.o # Comment out this line to skip the C++ test of xxx functions else $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions +$(gpu_testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions endif ifneq ($(UNAME_S),Darwin) # Disable testmisc on Darwin (workaround for issue #838) ifeq ($(GPUCC),) -$(BUILDDIR)/testmisc.o: $(GTESTLIBS) -$(BUILDDIR)/testmisc.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testmisc.o # Comment out this line to skip the C++ miscellaneous tests +$(BUILDDIR)/testmisc_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testmisc_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/testmisc_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testmisc_cpp.o # Comment out this line to skip the C++ miscellaneous tests else $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests +$(gpu_testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests endif endif ifeq ($(GPUCC),) -$(BUILDDIR)/runTest.o: $(GTESTLIBS) -$(BUILDDIR)/runTest.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/runTest.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/runTest.o +$(BUILDDIR)/runTest_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/runTest_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/runTest_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/runTest_cpp.o else $(BUILDDIR)/runTest_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/runTest_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o endif -$(testmain): $(GTESTLIBS) -$(testmain): INCFLAGS += $(GTESTINC) -$(testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest -###$(testmain): LIBFLAGS += -lgtest_main # no longer necessary since we added main() to testxxx.cc +ifeq ($(GPUCC),) +$(cxx_testmain): $(GTESTLIBS) +$(cxx_testmain): INCFLAGS += $(GTESTINC) +$(cxx_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +else +$(gpu_testmain): $(GTESTLIBS) +$(gpu_testmain): INCFLAGS += $(GTESTINC) +$(gpu_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +endif +ifeq ($(GPUCC),) # if at all, OMP is used only in CXX builds (not in GPU builds) ifneq ($(OMPFLAGS),) ifneq ($(shell $(CXX) --version | egrep '^Intel'),) -$(testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) +$(cxx_testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) else ifneq ($(shell $(CXX) --version | egrep '^clang'),) -$(testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 +$(cxx_testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 ###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -###$(testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) +###$(cxx_testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) else -$(testmain): LIBFLAGS += -lgomp +$(cxx_testmain): LIBFLAGS += -lgomp +endif endif endif # Test quadmath in testmisc.cc tests for constexpr_math #627 -###$(testmain): LIBFLAGS += -lquadmath +###ifeq ($(GPUCC),) +###$(cxx_testmain): LIBFLAGS += -lquadmath +###else +###$(gpu_testmain): LIBFLAGS += -lquadmath +###endif # Bypass std::filesystem completely to ease portability on LUMI #803 -#ifneq ($(findstring hipcc,$(GPUCC)),) -#$(testmain): LIBFLAGS += -lstdc++fs -#endif +###ifneq ($(findstring hipcc,$(GPUCC)),) +###$(gpu_testmain): LIBFLAGS += -lstdc++fs +###endif -ifeq ($(GPUCC),) # link only runTest.o -$(testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) +ifeq ($(GPUCC),) # link only runTest_cpp.o +$(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) $(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS) -else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest.o and runTest_$(GPUSUFFIX).o) -$(testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) +else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o) +$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else @@ -1051,51 +1064,50 @@ endif #------------------------------------------------------------------------------- -# Target: check (run the C++ test executable) -# [NB THIS IS WHAT IS USED IN THE GITHUB CI!] -# [FIXME: SHOULD CHANGE THE TARGET NAME "check" THAT HAS NOTHING TO DO WITH "check.exe"] -ifneq ($(GPUCC),) -check: runTest cmpFGcheck -else -check: runTest cmpFcheck -endif +# Target: 'make test' (execute runTest.exe, and compare check.exe with fcheck.exe) +# [NB: THIS IS WHAT IS TESTED IN THE GITHUB CI!] +# [NB: This used to be called 'make check' but the name has been changed as this has nothing to do with 'check.exe'] +test: runTest cmpFcheck -# Target: runTest (run the C++ test executable runTest.exe) +# Target: runTest (run the C++ or CUDA/HIP test executable runTest.exe) runTest: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/runTest.exe +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/runTest_cpp.exe +else + $(RUNTIME) $(BUILDDIR)/runTest_$(GPUSUFFIX).exe +endif -# Target: runCheck (run the C++ standalone executable check.exe, with a small number of events) +# Target: runCheck (run the C++ or CUDA/HIP standalone executable check.exe, with a small number of events) runCheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/check.exe -p 2 32 2 - -# Target: runGcheck (run the CUDA/HIP standalone executable gcheck.exe, with a small number of events) -runGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/gcheck.exe -p 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/check_cpp.exe -p 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +endif -# Target: runFcheck (run the Fortran standalone executable - with C++ MEs - fcheck.exe, with a small number of events) +# Target: runFcheck (run the Fortran standalone executable - with C++ or CUDA/HIP MEs - fcheck.exe, with a small number of events) runFcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 - -# Target: runFGcheck (run the Fortran standalone executable - with CUDA/HIP MEs - fgcheck.exe, with a small number of events) -runFGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 +endif -# Target: cmpFcheck (compare ME results from the C++ and Fortran with C++ MEs standalone executables, with a small number of events) +# Target: cmpFcheck (compare ME results from the C++/CUDA/HIP and Fortran with C++/CUDA/HIP MEs standalone executables, with a small number of events) cmpFcheck: all.$(TAG) @echo - @echo "$(BUILDDIR)/check.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/check.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi - -# Target: cmpFGcheck (compare ME results from the CUDA/HIP and Fortran with CUDA/HIP MEs standalone executables, with a small number of events) -cmpFGcheck: all.$(TAG) - @echo - @echo "$(BUILDDIR)/gcheck.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fgcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/gcheck.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +ifeq ($(GPUCC),) + @echo "$(BUILDDIR)/check_cpp.exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_cpp.exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_cpp.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +else + @echo "$(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +endif # Target: cuda-memcheck (run the CUDA standalone executable gcheck.exe with a small number of events through cuda-memcheck) cuda-memcheck: all.$(TAG) - $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/gcheck.exe -p 2 32 2 + $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttggg.sa/src/cudacpp_src.mk b/epochX/cudacpp/gg_ttggg.sa/src/cudacpp_src.mk index f727c0b313..d4cc8f4b78 100644 --- a/epochX/cudacpp/gg_ttggg.sa/src/cudacpp_src.mk +++ b/epochX/cudacpp/gg_ttggg.sa/src/cudacpp_src.mk @@ -98,7 +98,11 @@ endif # NB1: there are no CUDA targets in src as we avoid RDC! # NB2: CUDA includes for curand.h are no longer needed in the C++ code anywhere in src! -MG5AMC_COMMONLIB = mg5amc_common +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) @@ -126,7 +130,7 @@ $(LIBDIR)/.build.$(TAG): #------------------------------------------------------------------------------- # Generic target and build rules: objects from C++ compilation -$(BUILDDIR)/%.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -139,22 +143,22 @@ endif #------------------------------------------------------------------------------- -cxx_objects=$(addprefix $(BUILDDIR)/, read_slha.o) -ifneq ($(GPUCC),) - gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_sm_$(GPUSUFFIX).o) +cxx_objects=$(addprefix $(BUILDDIR)/, read_slha_cpp.o) +ifeq ($(GPUCC),) + cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_sm_cpp.o) else - cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_sm.o) + gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_sm_$(GPUSUFFIX).o) endif # Target (and build rules): common (src) library -ifneq ($(GPUCC),) -$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) - @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) -else +ifeq ($(GPUCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) +else +$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) + @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi + $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index ac6d206671..5a014a7cb3 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005831003189086914  +DEBUG: model prefixing takes 0.0051610469818115234  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -170,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.083 s +8 processes with 40 diagrams generated in 0.077 s Total: 8 processes with 40 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -201,7 +201,7 @@ INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -218,7 +218,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -233,17 +233,17 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux -Generated helas calls for 2 subprocesses (10 diagrams) in 0.033 s -Wrote files for 32 helas calls in 0.233 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s +Wrote files for 32 helas calls in 0.218 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.155 s +ALOHA: aloha creates 2 routines in 0.139 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.139 s +ALOHA: aloha creates 4 routines in 0.124 s FFV1 FFV1 FFV1 @@ -296,9 +296,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.021s -user 0m1.798s -sys 0m0.224s +real 0m1.900s +user 0m1.650s +sys 0m0.246s Code generation completed in 2 seconds ************************************************************ * * @@ -324,8 +324,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run quit INFO: @@ -354,8 +354,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards param quit INFO: diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk index f047530438..f8d86fd493 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk @@ -54,65 +54,6 @@ endif #------------------------------------------------------------------------------- -#=== Configure common compiler flags for C++ and CUDA/HIP - -INCFLAGS = -I. -OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here - -# Dependency on src directory -MG5AMC_COMMONLIB = mg5amc_common -LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -INCFLAGS += -I../../src - -# Compiler-specific googletest build directory (#125 and #738) -ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) - override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) -else ifneq ($(shell $(CXX) --version | egrep '^clang'),) - override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) - override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else - override CXXNAME = unknown -endif -###$(info CXXNAME=$(CXXNAME)) -override CXXNAMESUFFIX = _$(CXXNAME) - -# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) -export CXXNAMESUFFIX - -# Dependency on test directory -# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) -# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) -###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation -###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation -TESTDIRCOMMON = ../../../../../test -TESTDIRLOCAL = ../../test -ifneq ($(wildcard $(GTEST_ROOT)),) - TESTDIR = -else ifneq ($(LOCALGTEST),) - TESTDIR=$(TESTDIRLOCAL) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) - TESTDIR = $(TESTDIRCOMMON) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else - TESTDIR = -endif -ifneq ($(GTEST_ROOT),) - GTESTLIBDIR = $(GTEST_ROOT)/lib64/ - GTESTLIBS = $(GTESTLIBDIR)/libgtest.a - GTESTINC = -I$(GTEST_ROOT)/include -else - GTESTLIBDIR = - GTESTLIBS = - GTESTINC = -endif -###$(info GTEST_ROOT = $(GTEST_ROOT)) -###$(info LOCALGTEST = $(LOCALGTEST)) -###$(info TESTDIR = $(TESTDIR)) - -#------------------------------------------------------------------------------- - #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available: eventually use native instead?) @@ -340,6 +281,69 @@ endif #------------------------------------------------------------------------------- +#=== Configure common compiler flags for C++ and CUDA/HIP + +INCFLAGS = -I. +OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here + +# Dependency on src directory +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif +LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +INCFLAGS += -I../../src + +# Compiler-specific googletest build directory (#125 and #738) +ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) + override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) +else ifneq ($(shell $(CXX) --version | egrep '^clang'),) + override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) + override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else + override CXXNAME = unknown +endif +###$(info CXXNAME=$(CXXNAME)) +override CXXNAMESUFFIX = _$(CXXNAME) + +# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) +export CXXNAMESUFFIX + +# Dependency on test directory +# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) +# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) +###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation +###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation +TESTDIRCOMMON = ../../../../../test +TESTDIRLOCAL = ../../test +ifneq ($(wildcard $(GTEST_ROOT)),) + TESTDIR = +else ifneq ($(LOCALGTEST),) + TESTDIR=$(TESTDIRLOCAL) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) + TESTDIR = $(TESTDIRCOMMON) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else + TESTDIR = +endif +ifneq ($(GTEST_ROOT),) + GTESTLIBDIR = $(GTEST_ROOT)/lib64/ + GTESTLIBS = $(GTESTLIBDIR)/libgtest.a + GTESTINC = -I$(GTEST_ROOT)/include +else + GTESTLIBDIR = + GTESTLIBS = + GTESTINC = +endif +###$(info GTEST_ROOT = $(GTEST_ROOT)) +###$(info LOCALGTEST = $(LOCALGTEST)) +###$(info TESTDIR = $(TESTDIR)) + +#------------------------------------------------------------------------------- + #=== Configure PowerPC-specific compiler flags for C++ and CUDA/HIP # PowerPC-specific CXX compiler flags (being reviewed) @@ -606,27 +610,25 @@ override RUNTIME = #=== Makefile TARGETS and build rules below #=============================================================================== -cxx_main=$(BUILDDIR)/check.exe -fcxx_main=$(BUILDDIR)/fcheck.exe -ifneq ($(GPUCC),) - gpu_main=$(BUILDDIR)/gcheck.exe - fgpu_main=$(BUILDDIR)/fgcheck.exe +ifeq ($(GPUCC),) + cxx_checkmain=$(BUILDDIR)/check_cpp.exe + cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe + cxx_testmain=$(BUILDDIR)/runTest_cpp.exe else - gpu_main= - fgpu_main= + gpu_checkmain=$(BUILDDIR)/check_$(GPUSUFFIX).exe + gpu_fcheckmain=$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe + gpu_testmain=$(BUILDDIR)/runTest_$(GPUSUFFIX).exe endif -testmain=$(BUILDDIR)/runTest.exe - # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) # First target (default goal) -ifneq ($(GPUCC),) -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_main) $(fgpu_main) $(if $(GTESTLIBS),$(testmain)) +ifeq ($(GPUCC),) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_checkmain) $(cxx_fcheckmain) $(if $(GTESTLIBS),$(cxx_testmain)) else -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_main) $(fcxx_main) $(if $(GTESTLIBS),$(testmain)) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_checkmain) $(gpu_fcheckmain) $(if $(GTESTLIBS),$(gpu_testmain)) endif # Target (and build options): debug @@ -643,30 +645,30 @@ $(BUILDDIR)/.build.$(TAG): @if [ "$(oldtagsb)" != "" ]; then echo "Cannot build for tag=$(TAG) as old builds exist for other tags:"; echo " $(oldtagsb)"; echo "Please run 'make clean' first\nIf 'make clean' is not enough: run 'make clean USEBUILDDIR=1 AVX=$(AVX) FPTYPE=$(FPTYPE)' or 'make cleanall'"; exit 1; fi @touch $(BUILDDIR)/.build.$(TAG) -# Apply special build flags only to CrossSectionKernel[_$(GPUSUFFIX)].o (no fast math, see #117 and #516) +# Apply special build flags only to CrossSectionKernel_.o (no fast math, see #117 and #516) # Added edgecase for HIP compilation ifeq ($(shell $(CXX) --version | grep ^nvc++),) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -fno-fast-math -$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math endif -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o (NVTX in timermap.h, #679) -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) +# Apply special build flags only to check_sa_.o (NVTX in timermap.h, #679) +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o and (Cu|Hip)randRandomNumberKernel[_$(GPUSUFFIX)].o -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(RNDCXXFLAGS) +# Apply special build flags only to check_sa_.o and (Cu|Hip)randRandomNumberKernel_.o +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) ifeq ($(HASCURAND),hasCurand) # curand headers, #679 -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUDA_INC) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(CUDA_INC) endif ifeq ($(HASHIPRAND),hasHiprand) # hiprand headers -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(HIP_INC) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(HIP_INC) endif # Avoid "warning: builtin __has_trivial_... is deprecated; use __is_trivially_... instead" in GPUCC with icx2023 (#592) @@ -679,21 +681,21 @@ endif # Avoid clang warning "overriding '-ffp-contract=fast' option with '-ffp-contract=on'" (#516) # This patch does remove the warning, but I prefer to keep it disabled for the moment... ###ifneq ($(shell $(CXX) --version | egrep '^(clang|Apple clang|Intel)'),) -###$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -Wno-overriding-t-option ###ifneq ($(GPUCC),) -###$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option ###endif ###endif #### Apply special build flags only to CPPProcess.o (-flto) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += -flto +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += -flto #### Apply special build flags only to CPPProcess.o (AVXFLAGS) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += $(AVXFLAGS) +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += $(AVXFLAGS) # Generic target and build rules: objects from C++ compilation # (NB do not include CUDA_INC here! add it only for NVTX or curand #679) -$(BUILDDIR)/%.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -718,8 +720,8 @@ processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') ###$(info processid_short=$(processid_short)) MG5AMC_CXXLIB = mg5amc_$(processid_short)_cpp -cxx_objects_lib=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o -cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel.o $(BUILDDIR)/RamboSamplingKernels.o +cxx_objects_lib=$(BUILDDIR)/CPPProcess_cpp.o $(BUILDDIR)/MatrixElementKernels_cpp.o $(BUILDDIR)/BridgeKernels_cpp.o $(BUILDDIR)/CrossSectionKernels_cpp.o +cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_cpp.o $(BUILDDIR)/RamboSamplingKernels_cpp.o ifneq ($(GPUCC),) MG5AMC_GPULIB = mg5amc_$(processid_short)_$(GPUSUFFIX) @@ -728,8 +730,8 @@ gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/ endif # Target (and build rules): C++ and CUDA/HIP shared libraries -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) @@ -756,63 +758,63 @@ endif #------------------------------------------------------------------------------- # Target (and build rules): C++ and CUDA/HIP standalone executables -$(cxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o - $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(RNDLIBFLAGS) +$(cxx_checkmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_checkmain): $(BUILDDIR)/check_sa_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o + $(CXX) -o $@ $(BUILDDIR)/check_sa_cpp.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o $(RNDLIBFLAGS) ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(gpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(gpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_checkmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_checkmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(gpu_main): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_checkmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(gpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(gpu_main): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o +$(gpu_checkmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_checkmain): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(GPUCC) -o $@ $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(RNDLIBFLAGS) endif #------------------------------------------------------------------------------- # Generic target and build rules: objects from Fortran compilation -$(BUILDDIR)/%.o : %.f *.inc +$(BUILDDIR)/%_fortran.o : %.f *.inc @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(FC) -I. -c $< -o $@ # Generic target and build rules: objects from Fortran compilation -###$(BUILDDIR)/%.o : %.f *.inc +###$(BUILDDIR)/%_fortran.o : %.f *.inc ### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi ### @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi ### $(FC) -I. -I$(INCDIR) -c $< -o $@ # Target (and build rules): Fortran standalone executables -###$(BUILDDIR)/fcheck_sa.o : $(INCDIR)/fbridge.inc +###$(BUILDDIR)/fcheck_sa_fortran.o : $(INCDIR)/fbridge.inc ifeq ($(UNAME_S),Darwin) -$(fcxx_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(cxx_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fcxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fcxx_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) +$(cxx_fcheckmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ else - $(CXX) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) + $(CXX) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) endif ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(fgpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(fgpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_fcheckmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_fcheckmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') endif ifeq ($(UNAME_S),Darwin) -$(fgpu_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(gpu_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fgpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fgpu_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) +$(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else - $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) + $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif endif @@ -820,83 +822,94 @@ endif # Target (and build rules): test objects and test executable ifeq ($(GPUCC),) -$(BUILDDIR)/testxxx.o: $(GTESTLIBS) -$(BUILDDIR)/testxxx.o: INCFLAGS += $(GTESTINC) -$(BUILDDIR)/testxxx.o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testxxx.o # Comment out this line to skip the C++ test of xxx functions +$(BUILDDIR)/testxxx_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testxxx_cpp.o: INCFLAGS += $(GTESTINC) +$(BUILDDIR)/testxxx_cpp.o: testxxx_cc_ref.txt +$(cxx_testmain): $(BUILDDIR)/testxxx_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testxxx_cpp.o # Comment out this line to skip the C++ test of xxx functions else $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions +$(gpu_testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions endif ifneq ($(UNAME_S),Darwin) # Disable testmisc on Darwin (workaround for issue #838) ifeq ($(GPUCC),) -$(BUILDDIR)/testmisc.o: $(GTESTLIBS) -$(BUILDDIR)/testmisc.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testmisc.o # Comment out this line to skip the C++ miscellaneous tests +$(BUILDDIR)/testmisc_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testmisc_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/testmisc_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testmisc_cpp.o # Comment out this line to skip the C++ miscellaneous tests else $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests +$(gpu_testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests endif endif ifeq ($(GPUCC),) -$(BUILDDIR)/runTest.o: $(GTESTLIBS) -$(BUILDDIR)/runTest.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/runTest.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/runTest.o +$(BUILDDIR)/runTest_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/runTest_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/runTest_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/runTest_cpp.o else $(BUILDDIR)/runTest_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/runTest_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o endif -$(testmain): $(GTESTLIBS) -$(testmain): INCFLAGS += $(GTESTINC) -$(testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest -###$(testmain): LIBFLAGS += -lgtest_main # no longer necessary since we added main() to testxxx.cc +ifeq ($(GPUCC),) +$(cxx_testmain): $(GTESTLIBS) +$(cxx_testmain): INCFLAGS += $(GTESTINC) +$(cxx_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +else +$(gpu_testmain): $(GTESTLIBS) +$(gpu_testmain): INCFLAGS += $(GTESTINC) +$(gpu_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +endif +ifeq ($(GPUCC),) # if at all, OMP is used only in CXX builds (not in GPU builds) ifneq ($(OMPFLAGS),) ifneq ($(shell $(CXX) --version | egrep '^Intel'),) -$(testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) +$(cxx_testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) else ifneq ($(shell $(CXX) --version | egrep '^clang'),) -$(testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 +$(cxx_testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 ###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -###$(testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) +###$(cxx_testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) else -$(testmain): LIBFLAGS += -lgomp +$(cxx_testmain): LIBFLAGS += -lgomp +endif endif endif # Test quadmath in testmisc.cc tests for constexpr_math #627 -###$(testmain): LIBFLAGS += -lquadmath +###ifeq ($(GPUCC),) +###$(cxx_testmain): LIBFLAGS += -lquadmath +###else +###$(gpu_testmain): LIBFLAGS += -lquadmath +###endif # Bypass std::filesystem completely to ease portability on LUMI #803 -#ifneq ($(findstring hipcc,$(GPUCC)),) -#$(testmain): LIBFLAGS += -lstdc++fs -#endif +###ifneq ($(findstring hipcc,$(GPUCC)),) +###$(gpu_testmain): LIBFLAGS += -lstdc++fs +###endif -ifeq ($(GPUCC),) # link only runTest.o -$(testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) +ifeq ($(GPUCC),) # link only runTest_cpp.o +$(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) $(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS) -else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest.o and runTest_$(GPUSUFFIX).o) -$(testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) +else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o) +$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else @@ -1051,51 +1064,50 @@ endif #------------------------------------------------------------------------------- -# Target: check (run the C++ test executable) -# [NB THIS IS WHAT IS USED IN THE GITHUB CI!] -# [FIXME: SHOULD CHANGE THE TARGET NAME "check" THAT HAS NOTHING TO DO WITH "check.exe"] -ifneq ($(GPUCC),) -check: runTest cmpFGcheck -else -check: runTest cmpFcheck -endif +# Target: 'make test' (execute runTest.exe, and compare check.exe with fcheck.exe) +# [NB: THIS IS WHAT IS TESTED IN THE GITHUB CI!] +# [NB: This used to be called 'make check' but the name has been changed as this has nothing to do with 'check.exe'] +test: runTest cmpFcheck -# Target: runTest (run the C++ test executable runTest.exe) +# Target: runTest (run the C++ or CUDA/HIP test executable runTest.exe) runTest: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/runTest.exe +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/runTest_cpp.exe +else + $(RUNTIME) $(BUILDDIR)/runTest_$(GPUSUFFIX).exe +endif -# Target: runCheck (run the C++ standalone executable check.exe, with a small number of events) +# Target: runCheck (run the C++ or CUDA/HIP standalone executable check.exe, with a small number of events) runCheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/check.exe -p 2 32 2 - -# Target: runGcheck (run the CUDA/HIP standalone executable gcheck.exe, with a small number of events) -runGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/gcheck.exe -p 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/check_cpp.exe -p 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +endif -# Target: runFcheck (run the Fortran standalone executable - with C++ MEs - fcheck.exe, with a small number of events) +# Target: runFcheck (run the Fortran standalone executable - with C++ or CUDA/HIP MEs - fcheck.exe, with a small number of events) runFcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 - -# Target: runFGcheck (run the Fortran standalone executable - with CUDA/HIP MEs - fgcheck.exe, with a small number of events) -runFGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 +endif -# Target: cmpFcheck (compare ME results from the C++ and Fortran with C++ MEs standalone executables, with a small number of events) +# Target: cmpFcheck (compare ME results from the C++/CUDA/HIP and Fortran with C++/CUDA/HIP MEs standalone executables, with a small number of events) cmpFcheck: all.$(TAG) @echo - @echo "$(BUILDDIR)/check.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/check.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi - -# Target: cmpFGcheck (compare ME results from the CUDA/HIP and Fortran with CUDA/HIP MEs standalone executables, with a small number of events) -cmpFGcheck: all.$(TAG) - @echo - @echo "$(BUILDDIR)/gcheck.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fgcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/gcheck.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +ifeq ($(GPUCC),) + @echo "$(BUILDDIR)/check_cpp.exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_cpp.exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_cpp.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +else + @echo "$(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +endif # Target: cuda-memcheck (run the CUDA standalone executable gcheck.exe with a small number of events through cuda-memcheck) cuda-memcheck: all.$(TAG) - $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/gcheck.exe -p 2 32 2 + $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/makefile b/epochX/cudacpp/gq_ttq.mad/SubProcesses/makefile index 22517d0cc6..6a21300f7a 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/makefile +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/makefile @@ -58,13 +58,15 @@ endif LINKLIBS = $(LINK_MADLOOP_LIB) $(LINK_LOOP_LIBS) -L$(LIBDIR) -ldhelas -ldsample -lmodel -lgeneric -lpdf -lcernlib $(llhapdf) -lbias CUDACPP_MAKEFILE=cudacpp.mk -CUDACPP_COMMONLIB=mg5amc_common processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') ifeq ($(BACKEND),cuda) +CUDACPP_COMMONLIB=mg5amc_common_cuda CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_cuda else ifeq ($(BACKEND),hip) +CUDACPP_COMMONLIB=mg5amc_common_hip CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_hip else +CUDACPP_COMMONLIB=mg5amc_common_cpp CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_cpp endif diff --git a/epochX/cudacpp/gq_ttq.mad/src/cudacpp_src.mk b/epochX/cudacpp/gq_ttq.mad/src/cudacpp_src.mk index f727c0b313..d4cc8f4b78 100644 --- a/epochX/cudacpp/gq_ttq.mad/src/cudacpp_src.mk +++ b/epochX/cudacpp/gq_ttq.mad/src/cudacpp_src.mk @@ -98,7 +98,11 @@ endif # NB1: there are no CUDA targets in src as we avoid RDC! # NB2: CUDA includes for curand.h are no longer needed in the C++ code anywhere in src! -MG5AMC_COMMONLIB = mg5amc_common +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) @@ -126,7 +130,7 @@ $(LIBDIR)/.build.$(TAG): #------------------------------------------------------------------------------- # Generic target and build rules: objects from C++ compilation -$(BUILDDIR)/%.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -139,22 +143,22 @@ endif #------------------------------------------------------------------------------- -cxx_objects=$(addprefix $(BUILDDIR)/, read_slha.o) -ifneq ($(GPUCC),) - gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_sm_$(GPUSUFFIX).o) +cxx_objects=$(addprefix $(BUILDDIR)/, read_slha_cpp.o) +ifeq ($(GPUCC),) + cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_sm_cpp.o) else - cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_sm.o) + gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_sm_$(GPUSUFFIX).o) endif # Target (and build rules): common (src) library -ifneq ($(GPUCC),) -$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) - @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) -else +ifeq ($(GPUCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) +else +$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) + @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi + $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index 0af1cee08d..c1507b6837 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0058209896087646484  +DEBUG: model prefixing takes 0.004972219467163086  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -170,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.083 s +8 processes with 40 diagrams generated in 0.077 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Load PLUGIN.CUDACPP_OUTPUT @@ -210,12 +210,12 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. -Generated helas calls for 2 subprocesses (10 diagrams) in 0.032 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.154 s +ALOHA: aloha creates 2 routines in 0.136 s FFV1 FFV1 FFV1 @@ -231,7 +231,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. quit -real 0m0.686s -user 0m0.626s -sys 0m0.056s -Code generation completed in 0 seconds +real 0m0.639s +user 0m0.570s +sys 0m0.059s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk index f047530438..f8d86fd493 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk @@ -54,65 +54,6 @@ endif #------------------------------------------------------------------------------- -#=== Configure common compiler flags for C++ and CUDA/HIP - -INCFLAGS = -I. -OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here - -# Dependency on src directory -MG5AMC_COMMONLIB = mg5amc_common -LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -INCFLAGS += -I../../src - -# Compiler-specific googletest build directory (#125 and #738) -ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) - override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) -else ifneq ($(shell $(CXX) --version | egrep '^clang'),) - override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) - override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else - override CXXNAME = unknown -endif -###$(info CXXNAME=$(CXXNAME)) -override CXXNAMESUFFIX = _$(CXXNAME) - -# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) -export CXXNAMESUFFIX - -# Dependency on test directory -# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) -# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) -###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation -###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation -TESTDIRCOMMON = ../../../../../test -TESTDIRLOCAL = ../../test -ifneq ($(wildcard $(GTEST_ROOT)),) - TESTDIR = -else ifneq ($(LOCALGTEST),) - TESTDIR=$(TESTDIRLOCAL) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) - TESTDIR = $(TESTDIRCOMMON) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else - TESTDIR = -endif -ifneq ($(GTEST_ROOT),) - GTESTLIBDIR = $(GTEST_ROOT)/lib64/ - GTESTLIBS = $(GTESTLIBDIR)/libgtest.a - GTESTINC = -I$(GTEST_ROOT)/include -else - GTESTLIBDIR = - GTESTLIBS = - GTESTINC = -endif -###$(info GTEST_ROOT = $(GTEST_ROOT)) -###$(info LOCALGTEST = $(LOCALGTEST)) -###$(info TESTDIR = $(TESTDIR)) - -#------------------------------------------------------------------------------- - #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available: eventually use native instead?) @@ -340,6 +281,69 @@ endif #------------------------------------------------------------------------------- +#=== Configure common compiler flags for C++ and CUDA/HIP + +INCFLAGS = -I. +OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here + +# Dependency on src directory +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif +LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +INCFLAGS += -I../../src + +# Compiler-specific googletest build directory (#125 and #738) +ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) + override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) +else ifneq ($(shell $(CXX) --version | egrep '^clang'),) + override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) + override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else + override CXXNAME = unknown +endif +###$(info CXXNAME=$(CXXNAME)) +override CXXNAMESUFFIX = _$(CXXNAME) + +# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) +export CXXNAMESUFFIX + +# Dependency on test directory +# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) +# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) +###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation +###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation +TESTDIRCOMMON = ../../../../../test +TESTDIRLOCAL = ../../test +ifneq ($(wildcard $(GTEST_ROOT)),) + TESTDIR = +else ifneq ($(LOCALGTEST),) + TESTDIR=$(TESTDIRLOCAL) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) + TESTDIR = $(TESTDIRCOMMON) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else + TESTDIR = +endif +ifneq ($(GTEST_ROOT),) + GTESTLIBDIR = $(GTEST_ROOT)/lib64/ + GTESTLIBS = $(GTESTLIBDIR)/libgtest.a + GTESTINC = -I$(GTEST_ROOT)/include +else + GTESTLIBDIR = + GTESTLIBS = + GTESTINC = +endif +###$(info GTEST_ROOT = $(GTEST_ROOT)) +###$(info LOCALGTEST = $(LOCALGTEST)) +###$(info TESTDIR = $(TESTDIR)) + +#------------------------------------------------------------------------------- + #=== Configure PowerPC-specific compiler flags for C++ and CUDA/HIP # PowerPC-specific CXX compiler flags (being reviewed) @@ -606,27 +610,25 @@ override RUNTIME = #=== Makefile TARGETS and build rules below #=============================================================================== -cxx_main=$(BUILDDIR)/check.exe -fcxx_main=$(BUILDDIR)/fcheck.exe -ifneq ($(GPUCC),) - gpu_main=$(BUILDDIR)/gcheck.exe - fgpu_main=$(BUILDDIR)/fgcheck.exe +ifeq ($(GPUCC),) + cxx_checkmain=$(BUILDDIR)/check_cpp.exe + cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe + cxx_testmain=$(BUILDDIR)/runTest_cpp.exe else - gpu_main= - fgpu_main= + gpu_checkmain=$(BUILDDIR)/check_$(GPUSUFFIX).exe + gpu_fcheckmain=$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe + gpu_testmain=$(BUILDDIR)/runTest_$(GPUSUFFIX).exe endif -testmain=$(BUILDDIR)/runTest.exe - # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) # First target (default goal) -ifneq ($(GPUCC),) -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_main) $(fgpu_main) $(if $(GTESTLIBS),$(testmain)) +ifeq ($(GPUCC),) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_checkmain) $(cxx_fcheckmain) $(if $(GTESTLIBS),$(cxx_testmain)) else -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_main) $(fcxx_main) $(if $(GTESTLIBS),$(testmain)) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_checkmain) $(gpu_fcheckmain) $(if $(GTESTLIBS),$(gpu_testmain)) endif # Target (and build options): debug @@ -643,30 +645,30 @@ $(BUILDDIR)/.build.$(TAG): @if [ "$(oldtagsb)" != "" ]; then echo "Cannot build for tag=$(TAG) as old builds exist for other tags:"; echo " $(oldtagsb)"; echo "Please run 'make clean' first\nIf 'make clean' is not enough: run 'make clean USEBUILDDIR=1 AVX=$(AVX) FPTYPE=$(FPTYPE)' or 'make cleanall'"; exit 1; fi @touch $(BUILDDIR)/.build.$(TAG) -# Apply special build flags only to CrossSectionKernel[_$(GPUSUFFIX)].o (no fast math, see #117 and #516) +# Apply special build flags only to CrossSectionKernel_.o (no fast math, see #117 and #516) # Added edgecase for HIP compilation ifeq ($(shell $(CXX) --version | grep ^nvc++),) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -fno-fast-math -$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math endif -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o (NVTX in timermap.h, #679) -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) +# Apply special build flags only to check_sa_.o (NVTX in timermap.h, #679) +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o and (Cu|Hip)randRandomNumberKernel[_$(GPUSUFFIX)].o -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(RNDCXXFLAGS) +# Apply special build flags only to check_sa_.o and (Cu|Hip)randRandomNumberKernel_.o +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) ifeq ($(HASCURAND),hasCurand) # curand headers, #679 -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUDA_INC) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(CUDA_INC) endif ifeq ($(HASHIPRAND),hasHiprand) # hiprand headers -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(HIP_INC) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(HIP_INC) endif # Avoid "warning: builtin __has_trivial_... is deprecated; use __is_trivially_... instead" in GPUCC with icx2023 (#592) @@ -679,21 +681,21 @@ endif # Avoid clang warning "overriding '-ffp-contract=fast' option with '-ffp-contract=on'" (#516) # This patch does remove the warning, but I prefer to keep it disabled for the moment... ###ifneq ($(shell $(CXX) --version | egrep '^(clang|Apple clang|Intel)'),) -###$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -Wno-overriding-t-option ###ifneq ($(GPUCC),) -###$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option ###endif ###endif #### Apply special build flags only to CPPProcess.o (-flto) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += -flto +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += -flto #### Apply special build flags only to CPPProcess.o (AVXFLAGS) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += $(AVXFLAGS) +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += $(AVXFLAGS) # Generic target and build rules: objects from C++ compilation # (NB do not include CUDA_INC here! add it only for NVTX or curand #679) -$(BUILDDIR)/%.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -718,8 +720,8 @@ processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') ###$(info processid_short=$(processid_short)) MG5AMC_CXXLIB = mg5amc_$(processid_short)_cpp -cxx_objects_lib=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o -cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel.o $(BUILDDIR)/RamboSamplingKernels.o +cxx_objects_lib=$(BUILDDIR)/CPPProcess_cpp.o $(BUILDDIR)/MatrixElementKernels_cpp.o $(BUILDDIR)/BridgeKernels_cpp.o $(BUILDDIR)/CrossSectionKernels_cpp.o +cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_cpp.o $(BUILDDIR)/RamboSamplingKernels_cpp.o ifneq ($(GPUCC),) MG5AMC_GPULIB = mg5amc_$(processid_short)_$(GPUSUFFIX) @@ -728,8 +730,8 @@ gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/ endif # Target (and build rules): C++ and CUDA/HIP shared libraries -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) @@ -756,63 +758,63 @@ endif #------------------------------------------------------------------------------- # Target (and build rules): C++ and CUDA/HIP standalone executables -$(cxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o - $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(RNDLIBFLAGS) +$(cxx_checkmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_checkmain): $(BUILDDIR)/check_sa_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o + $(CXX) -o $@ $(BUILDDIR)/check_sa_cpp.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o $(RNDLIBFLAGS) ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(gpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(gpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_checkmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_checkmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(gpu_main): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_checkmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(gpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(gpu_main): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o +$(gpu_checkmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_checkmain): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(GPUCC) -o $@ $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(RNDLIBFLAGS) endif #------------------------------------------------------------------------------- # Generic target and build rules: objects from Fortran compilation -$(BUILDDIR)/%.o : %.f *.inc +$(BUILDDIR)/%_fortran.o : %.f *.inc @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(FC) -I. -c $< -o $@ # Generic target and build rules: objects from Fortran compilation -###$(BUILDDIR)/%.o : %.f *.inc +###$(BUILDDIR)/%_fortran.o : %.f *.inc ### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi ### @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi ### $(FC) -I. -I$(INCDIR) -c $< -o $@ # Target (and build rules): Fortran standalone executables -###$(BUILDDIR)/fcheck_sa.o : $(INCDIR)/fbridge.inc +###$(BUILDDIR)/fcheck_sa_fortran.o : $(INCDIR)/fbridge.inc ifeq ($(UNAME_S),Darwin) -$(fcxx_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(cxx_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fcxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fcxx_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) +$(cxx_fcheckmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ else - $(CXX) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) + $(CXX) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) endif ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(fgpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(fgpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_fcheckmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_fcheckmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') endif ifeq ($(UNAME_S),Darwin) -$(fgpu_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(gpu_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fgpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fgpu_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) +$(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else - $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) + $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif endif @@ -820,83 +822,94 @@ endif # Target (and build rules): test objects and test executable ifeq ($(GPUCC),) -$(BUILDDIR)/testxxx.o: $(GTESTLIBS) -$(BUILDDIR)/testxxx.o: INCFLAGS += $(GTESTINC) -$(BUILDDIR)/testxxx.o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testxxx.o # Comment out this line to skip the C++ test of xxx functions +$(BUILDDIR)/testxxx_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testxxx_cpp.o: INCFLAGS += $(GTESTINC) +$(BUILDDIR)/testxxx_cpp.o: testxxx_cc_ref.txt +$(cxx_testmain): $(BUILDDIR)/testxxx_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testxxx_cpp.o # Comment out this line to skip the C++ test of xxx functions else $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions +$(gpu_testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions endif ifneq ($(UNAME_S),Darwin) # Disable testmisc on Darwin (workaround for issue #838) ifeq ($(GPUCC),) -$(BUILDDIR)/testmisc.o: $(GTESTLIBS) -$(BUILDDIR)/testmisc.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testmisc.o # Comment out this line to skip the C++ miscellaneous tests +$(BUILDDIR)/testmisc_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testmisc_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/testmisc_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testmisc_cpp.o # Comment out this line to skip the C++ miscellaneous tests else $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests +$(gpu_testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests endif endif ifeq ($(GPUCC),) -$(BUILDDIR)/runTest.o: $(GTESTLIBS) -$(BUILDDIR)/runTest.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/runTest.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/runTest.o +$(BUILDDIR)/runTest_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/runTest_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/runTest_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/runTest_cpp.o else $(BUILDDIR)/runTest_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/runTest_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o endif -$(testmain): $(GTESTLIBS) -$(testmain): INCFLAGS += $(GTESTINC) -$(testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest -###$(testmain): LIBFLAGS += -lgtest_main # no longer necessary since we added main() to testxxx.cc +ifeq ($(GPUCC),) +$(cxx_testmain): $(GTESTLIBS) +$(cxx_testmain): INCFLAGS += $(GTESTINC) +$(cxx_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +else +$(gpu_testmain): $(GTESTLIBS) +$(gpu_testmain): INCFLAGS += $(GTESTINC) +$(gpu_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +endif +ifeq ($(GPUCC),) # if at all, OMP is used only in CXX builds (not in GPU builds) ifneq ($(OMPFLAGS),) ifneq ($(shell $(CXX) --version | egrep '^Intel'),) -$(testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) +$(cxx_testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) else ifneq ($(shell $(CXX) --version | egrep '^clang'),) -$(testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 +$(cxx_testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 ###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -###$(testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) +###$(cxx_testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) else -$(testmain): LIBFLAGS += -lgomp +$(cxx_testmain): LIBFLAGS += -lgomp +endif endif endif # Test quadmath in testmisc.cc tests for constexpr_math #627 -###$(testmain): LIBFLAGS += -lquadmath +###ifeq ($(GPUCC),) +###$(cxx_testmain): LIBFLAGS += -lquadmath +###else +###$(gpu_testmain): LIBFLAGS += -lquadmath +###endif # Bypass std::filesystem completely to ease portability on LUMI #803 -#ifneq ($(findstring hipcc,$(GPUCC)),) -#$(testmain): LIBFLAGS += -lstdc++fs -#endif +###ifneq ($(findstring hipcc,$(GPUCC)),) +###$(gpu_testmain): LIBFLAGS += -lstdc++fs +###endif -ifeq ($(GPUCC),) # link only runTest.o -$(testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) +ifeq ($(GPUCC),) # link only runTest_cpp.o +$(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) $(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS) -else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest.o and runTest_$(GPUSUFFIX).o) -$(testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) +else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o) +$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else @@ -1051,51 +1064,50 @@ endif #------------------------------------------------------------------------------- -# Target: check (run the C++ test executable) -# [NB THIS IS WHAT IS USED IN THE GITHUB CI!] -# [FIXME: SHOULD CHANGE THE TARGET NAME "check" THAT HAS NOTHING TO DO WITH "check.exe"] -ifneq ($(GPUCC),) -check: runTest cmpFGcheck -else -check: runTest cmpFcheck -endif +# Target: 'make test' (execute runTest.exe, and compare check.exe with fcheck.exe) +# [NB: THIS IS WHAT IS TESTED IN THE GITHUB CI!] +# [NB: This used to be called 'make check' but the name has been changed as this has nothing to do with 'check.exe'] +test: runTest cmpFcheck -# Target: runTest (run the C++ test executable runTest.exe) +# Target: runTest (run the C++ or CUDA/HIP test executable runTest.exe) runTest: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/runTest.exe +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/runTest_cpp.exe +else + $(RUNTIME) $(BUILDDIR)/runTest_$(GPUSUFFIX).exe +endif -# Target: runCheck (run the C++ standalone executable check.exe, with a small number of events) +# Target: runCheck (run the C++ or CUDA/HIP standalone executable check.exe, with a small number of events) runCheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/check.exe -p 2 32 2 - -# Target: runGcheck (run the CUDA/HIP standalone executable gcheck.exe, with a small number of events) -runGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/gcheck.exe -p 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/check_cpp.exe -p 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +endif -# Target: runFcheck (run the Fortran standalone executable - with C++ MEs - fcheck.exe, with a small number of events) +# Target: runFcheck (run the Fortran standalone executable - with C++ or CUDA/HIP MEs - fcheck.exe, with a small number of events) runFcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 - -# Target: runFGcheck (run the Fortran standalone executable - with CUDA/HIP MEs - fgcheck.exe, with a small number of events) -runFGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 +endif -# Target: cmpFcheck (compare ME results from the C++ and Fortran with C++ MEs standalone executables, with a small number of events) +# Target: cmpFcheck (compare ME results from the C++/CUDA/HIP and Fortran with C++/CUDA/HIP MEs standalone executables, with a small number of events) cmpFcheck: all.$(TAG) @echo - @echo "$(BUILDDIR)/check.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/check.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi - -# Target: cmpFGcheck (compare ME results from the CUDA/HIP and Fortran with CUDA/HIP MEs standalone executables, with a small number of events) -cmpFGcheck: all.$(TAG) - @echo - @echo "$(BUILDDIR)/gcheck.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fgcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/gcheck.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +ifeq ($(GPUCC),) + @echo "$(BUILDDIR)/check_cpp.exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_cpp.exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_cpp.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +else + @echo "$(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +endif # Target: cuda-memcheck (run the CUDA standalone executable gcheck.exe with a small number of events through cuda-memcheck) cuda-memcheck: all.$(TAG) - $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/gcheck.exe -p 2 32 2 + $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gq_ttq.sa/src/cudacpp_src.mk b/epochX/cudacpp/gq_ttq.sa/src/cudacpp_src.mk index f727c0b313..d4cc8f4b78 100644 --- a/epochX/cudacpp/gq_ttq.sa/src/cudacpp_src.mk +++ b/epochX/cudacpp/gq_ttq.sa/src/cudacpp_src.mk @@ -98,7 +98,11 @@ endif # NB1: there are no CUDA targets in src as we avoid RDC! # NB2: CUDA includes for curand.h are no longer needed in the C++ code anywhere in src! -MG5AMC_COMMONLIB = mg5amc_common +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) @@ -126,7 +130,7 @@ $(LIBDIR)/.build.$(TAG): #------------------------------------------------------------------------------- # Generic target and build rules: objects from C++ compilation -$(BUILDDIR)/%.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -139,22 +143,22 @@ endif #------------------------------------------------------------------------------- -cxx_objects=$(addprefix $(BUILDDIR)/, read_slha.o) -ifneq ($(GPUCC),) - gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_sm_$(GPUSUFFIX).o) +cxx_objects=$(addprefix $(BUILDDIR)/, read_slha_cpp.o) +ifeq ($(GPUCC),) + cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_sm_cpp.o) else - cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_sm.o) + gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_sm_$(GPUSUFFIX).o) endif # Target (and build rules): common (src) library -ifneq ($(GPUCC),) -$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) - @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) -else +ifeq ($(GPUCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) +else +$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) + @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi + $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt index 661f542f0e..e752892fe5 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -127,7 +127,7 @@ Defined multiparticle all = g u c d s u~ c~ d~ s~ a ve vm vt e- mu- ve~ vm~ vt~ generate g g > b b~ HIW<=1 INFO: Trying process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Process has 4 diagrams -1 processes with 4 diagrams generated in 0.006 s +1 processes with 4 diagrams generated in 0.005 s Total: 1 processes with 4 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_heft_gg_bb --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -150,7 +150,7 @@ INFO: Generating Helas calls for process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Processing color information for process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Creating files in directory P1_gg_bbx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -166,20 +166,20 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Finding symmetric diagrams for subprocess group gg_bbx Generated helas calls for 1 subprocesses (4 diagrams) in 0.009 s -Wrote files for 12 helas calls in 0.110 s +Wrote files for 12 helas calls in 0.104 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.280 s +ALOHA: aloha creates 4 routines in 0.249 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 8 routines in 0.263 s +ALOHA: aloha creates 8 routines in 0.233 s VVS3 VVV1 FFV1 @@ -217,9 +217,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.031s -user 0m1.827s -sys 0m0.208s +real 0m1.898s +user 0m1.654s +sys 0m0.240s Code generation completed in 2 seconds ************************************************************ * * @@ -245,8 +245,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run quit INFO: @@ -275,8 +275,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards param quit INFO: diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cudacpp.mk index f047530438..f8d86fd493 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cudacpp.mk @@ -54,65 +54,6 @@ endif #------------------------------------------------------------------------------- -#=== Configure common compiler flags for C++ and CUDA/HIP - -INCFLAGS = -I. -OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here - -# Dependency on src directory -MG5AMC_COMMONLIB = mg5amc_common -LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -INCFLAGS += -I../../src - -# Compiler-specific googletest build directory (#125 and #738) -ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) - override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) -else ifneq ($(shell $(CXX) --version | egrep '^clang'),) - override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) - override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else - override CXXNAME = unknown -endif -###$(info CXXNAME=$(CXXNAME)) -override CXXNAMESUFFIX = _$(CXXNAME) - -# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) -export CXXNAMESUFFIX - -# Dependency on test directory -# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) -# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) -###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation -###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation -TESTDIRCOMMON = ../../../../../test -TESTDIRLOCAL = ../../test -ifneq ($(wildcard $(GTEST_ROOT)),) - TESTDIR = -else ifneq ($(LOCALGTEST),) - TESTDIR=$(TESTDIRLOCAL) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) - TESTDIR = $(TESTDIRCOMMON) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else - TESTDIR = -endif -ifneq ($(GTEST_ROOT),) - GTESTLIBDIR = $(GTEST_ROOT)/lib64/ - GTESTLIBS = $(GTESTLIBDIR)/libgtest.a - GTESTINC = -I$(GTEST_ROOT)/include -else - GTESTLIBDIR = - GTESTLIBS = - GTESTINC = -endif -###$(info GTEST_ROOT = $(GTEST_ROOT)) -###$(info LOCALGTEST = $(LOCALGTEST)) -###$(info TESTDIR = $(TESTDIR)) - -#------------------------------------------------------------------------------- - #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available: eventually use native instead?) @@ -340,6 +281,69 @@ endif #------------------------------------------------------------------------------- +#=== Configure common compiler flags for C++ and CUDA/HIP + +INCFLAGS = -I. +OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here + +# Dependency on src directory +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif +LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +INCFLAGS += -I../../src + +# Compiler-specific googletest build directory (#125 and #738) +ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) + override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) +else ifneq ($(shell $(CXX) --version | egrep '^clang'),) + override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) + override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else + override CXXNAME = unknown +endif +###$(info CXXNAME=$(CXXNAME)) +override CXXNAMESUFFIX = _$(CXXNAME) + +# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) +export CXXNAMESUFFIX + +# Dependency on test directory +# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) +# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) +###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation +###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation +TESTDIRCOMMON = ../../../../../test +TESTDIRLOCAL = ../../test +ifneq ($(wildcard $(GTEST_ROOT)),) + TESTDIR = +else ifneq ($(LOCALGTEST),) + TESTDIR=$(TESTDIRLOCAL) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) + TESTDIR = $(TESTDIRCOMMON) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else + TESTDIR = +endif +ifneq ($(GTEST_ROOT),) + GTESTLIBDIR = $(GTEST_ROOT)/lib64/ + GTESTLIBS = $(GTESTLIBDIR)/libgtest.a + GTESTINC = -I$(GTEST_ROOT)/include +else + GTESTLIBDIR = + GTESTLIBS = + GTESTINC = +endif +###$(info GTEST_ROOT = $(GTEST_ROOT)) +###$(info LOCALGTEST = $(LOCALGTEST)) +###$(info TESTDIR = $(TESTDIR)) + +#------------------------------------------------------------------------------- + #=== Configure PowerPC-specific compiler flags for C++ and CUDA/HIP # PowerPC-specific CXX compiler flags (being reviewed) @@ -606,27 +610,25 @@ override RUNTIME = #=== Makefile TARGETS and build rules below #=============================================================================== -cxx_main=$(BUILDDIR)/check.exe -fcxx_main=$(BUILDDIR)/fcheck.exe -ifneq ($(GPUCC),) - gpu_main=$(BUILDDIR)/gcheck.exe - fgpu_main=$(BUILDDIR)/fgcheck.exe +ifeq ($(GPUCC),) + cxx_checkmain=$(BUILDDIR)/check_cpp.exe + cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe + cxx_testmain=$(BUILDDIR)/runTest_cpp.exe else - gpu_main= - fgpu_main= + gpu_checkmain=$(BUILDDIR)/check_$(GPUSUFFIX).exe + gpu_fcheckmain=$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe + gpu_testmain=$(BUILDDIR)/runTest_$(GPUSUFFIX).exe endif -testmain=$(BUILDDIR)/runTest.exe - # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) # First target (default goal) -ifneq ($(GPUCC),) -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_main) $(fgpu_main) $(if $(GTESTLIBS),$(testmain)) +ifeq ($(GPUCC),) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_checkmain) $(cxx_fcheckmain) $(if $(GTESTLIBS),$(cxx_testmain)) else -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_main) $(fcxx_main) $(if $(GTESTLIBS),$(testmain)) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_checkmain) $(gpu_fcheckmain) $(if $(GTESTLIBS),$(gpu_testmain)) endif # Target (and build options): debug @@ -643,30 +645,30 @@ $(BUILDDIR)/.build.$(TAG): @if [ "$(oldtagsb)" != "" ]; then echo "Cannot build for tag=$(TAG) as old builds exist for other tags:"; echo " $(oldtagsb)"; echo "Please run 'make clean' first\nIf 'make clean' is not enough: run 'make clean USEBUILDDIR=1 AVX=$(AVX) FPTYPE=$(FPTYPE)' or 'make cleanall'"; exit 1; fi @touch $(BUILDDIR)/.build.$(TAG) -# Apply special build flags only to CrossSectionKernel[_$(GPUSUFFIX)].o (no fast math, see #117 and #516) +# Apply special build flags only to CrossSectionKernel_.o (no fast math, see #117 and #516) # Added edgecase for HIP compilation ifeq ($(shell $(CXX) --version | grep ^nvc++),) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -fno-fast-math -$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math endif -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o (NVTX in timermap.h, #679) -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) +# Apply special build flags only to check_sa_.o (NVTX in timermap.h, #679) +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o and (Cu|Hip)randRandomNumberKernel[_$(GPUSUFFIX)].o -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(RNDCXXFLAGS) +# Apply special build flags only to check_sa_.o and (Cu|Hip)randRandomNumberKernel_.o +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) ifeq ($(HASCURAND),hasCurand) # curand headers, #679 -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUDA_INC) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(CUDA_INC) endif ifeq ($(HASHIPRAND),hasHiprand) # hiprand headers -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(HIP_INC) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(HIP_INC) endif # Avoid "warning: builtin __has_trivial_... is deprecated; use __is_trivially_... instead" in GPUCC with icx2023 (#592) @@ -679,21 +681,21 @@ endif # Avoid clang warning "overriding '-ffp-contract=fast' option with '-ffp-contract=on'" (#516) # This patch does remove the warning, but I prefer to keep it disabled for the moment... ###ifneq ($(shell $(CXX) --version | egrep '^(clang|Apple clang|Intel)'),) -###$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -Wno-overriding-t-option ###ifneq ($(GPUCC),) -###$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option ###endif ###endif #### Apply special build flags only to CPPProcess.o (-flto) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += -flto +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += -flto #### Apply special build flags only to CPPProcess.o (AVXFLAGS) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += $(AVXFLAGS) +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += $(AVXFLAGS) # Generic target and build rules: objects from C++ compilation # (NB do not include CUDA_INC here! add it only for NVTX or curand #679) -$(BUILDDIR)/%.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -718,8 +720,8 @@ processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') ###$(info processid_short=$(processid_short)) MG5AMC_CXXLIB = mg5amc_$(processid_short)_cpp -cxx_objects_lib=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o -cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel.o $(BUILDDIR)/RamboSamplingKernels.o +cxx_objects_lib=$(BUILDDIR)/CPPProcess_cpp.o $(BUILDDIR)/MatrixElementKernels_cpp.o $(BUILDDIR)/BridgeKernels_cpp.o $(BUILDDIR)/CrossSectionKernels_cpp.o +cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_cpp.o $(BUILDDIR)/RamboSamplingKernels_cpp.o ifneq ($(GPUCC),) MG5AMC_GPULIB = mg5amc_$(processid_short)_$(GPUSUFFIX) @@ -728,8 +730,8 @@ gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/ endif # Target (and build rules): C++ and CUDA/HIP shared libraries -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) @@ -756,63 +758,63 @@ endif #------------------------------------------------------------------------------- # Target (and build rules): C++ and CUDA/HIP standalone executables -$(cxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o - $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(RNDLIBFLAGS) +$(cxx_checkmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_checkmain): $(BUILDDIR)/check_sa_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o + $(CXX) -o $@ $(BUILDDIR)/check_sa_cpp.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o $(RNDLIBFLAGS) ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(gpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(gpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_checkmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_checkmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(gpu_main): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_checkmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(gpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(gpu_main): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o +$(gpu_checkmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_checkmain): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(GPUCC) -o $@ $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(RNDLIBFLAGS) endif #------------------------------------------------------------------------------- # Generic target and build rules: objects from Fortran compilation -$(BUILDDIR)/%.o : %.f *.inc +$(BUILDDIR)/%_fortran.o : %.f *.inc @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(FC) -I. -c $< -o $@ # Generic target and build rules: objects from Fortran compilation -###$(BUILDDIR)/%.o : %.f *.inc +###$(BUILDDIR)/%_fortran.o : %.f *.inc ### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi ### @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi ### $(FC) -I. -I$(INCDIR) -c $< -o $@ # Target (and build rules): Fortran standalone executables -###$(BUILDDIR)/fcheck_sa.o : $(INCDIR)/fbridge.inc +###$(BUILDDIR)/fcheck_sa_fortran.o : $(INCDIR)/fbridge.inc ifeq ($(UNAME_S),Darwin) -$(fcxx_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(cxx_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fcxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fcxx_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) +$(cxx_fcheckmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ else - $(CXX) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) + $(CXX) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) endif ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(fgpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(fgpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_fcheckmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_fcheckmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') endif ifeq ($(UNAME_S),Darwin) -$(fgpu_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(gpu_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fgpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fgpu_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) +$(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else - $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) + $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif endif @@ -820,83 +822,94 @@ endif # Target (and build rules): test objects and test executable ifeq ($(GPUCC),) -$(BUILDDIR)/testxxx.o: $(GTESTLIBS) -$(BUILDDIR)/testxxx.o: INCFLAGS += $(GTESTINC) -$(BUILDDIR)/testxxx.o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testxxx.o # Comment out this line to skip the C++ test of xxx functions +$(BUILDDIR)/testxxx_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testxxx_cpp.o: INCFLAGS += $(GTESTINC) +$(BUILDDIR)/testxxx_cpp.o: testxxx_cc_ref.txt +$(cxx_testmain): $(BUILDDIR)/testxxx_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testxxx_cpp.o # Comment out this line to skip the C++ test of xxx functions else $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions +$(gpu_testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions endif ifneq ($(UNAME_S),Darwin) # Disable testmisc on Darwin (workaround for issue #838) ifeq ($(GPUCC),) -$(BUILDDIR)/testmisc.o: $(GTESTLIBS) -$(BUILDDIR)/testmisc.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testmisc.o # Comment out this line to skip the C++ miscellaneous tests +$(BUILDDIR)/testmisc_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testmisc_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/testmisc_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testmisc_cpp.o # Comment out this line to skip the C++ miscellaneous tests else $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests +$(gpu_testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests endif endif ifeq ($(GPUCC),) -$(BUILDDIR)/runTest.o: $(GTESTLIBS) -$(BUILDDIR)/runTest.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/runTest.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/runTest.o +$(BUILDDIR)/runTest_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/runTest_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/runTest_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/runTest_cpp.o else $(BUILDDIR)/runTest_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/runTest_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o endif -$(testmain): $(GTESTLIBS) -$(testmain): INCFLAGS += $(GTESTINC) -$(testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest -###$(testmain): LIBFLAGS += -lgtest_main # no longer necessary since we added main() to testxxx.cc +ifeq ($(GPUCC),) +$(cxx_testmain): $(GTESTLIBS) +$(cxx_testmain): INCFLAGS += $(GTESTINC) +$(cxx_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +else +$(gpu_testmain): $(GTESTLIBS) +$(gpu_testmain): INCFLAGS += $(GTESTINC) +$(gpu_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +endif +ifeq ($(GPUCC),) # if at all, OMP is used only in CXX builds (not in GPU builds) ifneq ($(OMPFLAGS),) ifneq ($(shell $(CXX) --version | egrep '^Intel'),) -$(testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) +$(cxx_testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) else ifneq ($(shell $(CXX) --version | egrep '^clang'),) -$(testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 +$(cxx_testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 ###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -###$(testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) +###$(cxx_testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) else -$(testmain): LIBFLAGS += -lgomp +$(cxx_testmain): LIBFLAGS += -lgomp +endif endif endif # Test quadmath in testmisc.cc tests for constexpr_math #627 -###$(testmain): LIBFLAGS += -lquadmath +###ifeq ($(GPUCC),) +###$(cxx_testmain): LIBFLAGS += -lquadmath +###else +###$(gpu_testmain): LIBFLAGS += -lquadmath +###endif # Bypass std::filesystem completely to ease portability on LUMI #803 -#ifneq ($(findstring hipcc,$(GPUCC)),) -#$(testmain): LIBFLAGS += -lstdc++fs -#endif +###ifneq ($(findstring hipcc,$(GPUCC)),) +###$(gpu_testmain): LIBFLAGS += -lstdc++fs +###endif -ifeq ($(GPUCC),) # link only runTest.o -$(testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) +ifeq ($(GPUCC),) # link only runTest_cpp.o +$(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) $(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS) -else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest.o and runTest_$(GPUSUFFIX).o) -$(testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) +else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o) +$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else @@ -1051,51 +1064,50 @@ endif #------------------------------------------------------------------------------- -# Target: check (run the C++ test executable) -# [NB THIS IS WHAT IS USED IN THE GITHUB CI!] -# [FIXME: SHOULD CHANGE THE TARGET NAME "check" THAT HAS NOTHING TO DO WITH "check.exe"] -ifneq ($(GPUCC),) -check: runTest cmpFGcheck -else -check: runTest cmpFcheck -endif +# Target: 'make test' (execute runTest.exe, and compare check.exe with fcheck.exe) +# [NB: THIS IS WHAT IS TESTED IN THE GITHUB CI!] +# [NB: This used to be called 'make check' but the name has been changed as this has nothing to do with 'check.exe'] +test: runTest cmpFcheck -# Target: runTest (run the C++ test executable runTest.exe) +# Target: runTest (run the C++ or CUDA/HIP test executable runTest.exe) runTest: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/runTest.exe +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/runTest_cpp.exe +else + $(RUNTIME) $(BUILDDIR)/runTest_$(GPUSUFFIX).exe +endif -# Target: runCheck (run the C++ standalone executable check.exe, with a small number of events) +# Target: runCheck (run the C++ or CUDA/HIP standalone executable check.exe, with a small number of events) runCheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/check.exe -p 2 32 2 - -# Target: runGcheck (run the CUDA/HIP standalone executable gcheck.exe, with a small number of events) -runGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/gcheck.exe -p 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/check_cpp.exe -p 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +endif -# Target: runFcheck (run the Fortran standalone executable - with C++ MEs - fcheck.exe, with a small number of events) +# Target: runFcheck (run the Fortran standalone executable - with C++ or CUDA/HIP MEs - fcheck.exe, with a small number of events) runFcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 - -# Target: runFGcheck (run the Fortran standalone executable - with CUDA/HIP MEs - fgcheck.exe, with a small number of events) -runFGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 +endif -# Target: cmpFcheck (compare ME results from the C++ and Fortran with C++ MEs standalone executables, with a small number of events) +# Target: cmpFcheck (compare ME results from the C++/CUDA/HIP and Fortran with C++/CUDA/HIP MEs standalone executables, with a small number of events) cmpFcheck: all.$(TAG) @echo - @echo "$(BUILDDIR)/check.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/check.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi - -# Target: cmpFGcheck (compare ME results from the CUDA/HIP and Fortran with CUDA/HIP MEs standalone executables, with a small number of events) -cmpFGcheck: all.$(TAG) - @echo - @echo "$(BUILDDIR)/gcheck.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fgcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/gcheck.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +ifeq ($(GPUCC),) + @echo "$(BUILDDIR)/check_cpp.exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_cpp.exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_cpp.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +else + @echo "$(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +endif # Target: cuda-memcheck (run the CUDA standalone executable gcheck.exe with a small number of events through cuda-memcheck) cuda-memcheck: all.$(TAG) - $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/gcheck.exe -p 2 32 2 + $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/makefile b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/makefile index 22517d0cc6..6a21300f7a 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/makefile +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/makefile @@ -58,13 +58,15 @@ endif LINKLIBS = $(LINK_MADLOOP_LIB) $(LINK_LOOP_LIBS) -L$(LIBDIR) -ldhelas -ldsample -lmodel -lgeneric -lpdf -lcernlib $(llhapdf) -lbias CUDACPP_MAKEFILE=cudacpp.mk -CUDACPP_COMMONLIB=mg5amc_common processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') ifeq ($(BACKEND),cuda) +CUDACPP_COMMONLIB=mg5amc_common_cuda CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_cuda else ifeq ($(BACKEND),hip) +CUDACPP_COMMONLIB=mg5amc_common_hip CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_hip else +CUDACPP_COMMONLIB=mg5amc_common_cpp CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_cpp endif diff --git a/epochX/cudacpp/heft_gg_bb.mad/src/cudacpp_src.mk b/epochX/cudacpp/heft_gg_bb.mad/src/cudacpp_src.mk index b8581be64f..7811ab46bb 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/src/cudacpp_src.mk +++ b/epochX/cudacpp/heft_gg_bb.mad/src/cudacpp_src.mk @@ -98,7 +98,11 @@ endif # NB1: there are no CUDA targets in src as we avoid RDC! # NB2: CUDA includes for curand.h are no longer needed in the C++ code anywhere in src! -MG5AMC_COMMONLIB = mg5amc_common +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) @@ -126,7 +130,7 @@ $(LIBDIR)/.build.$(TAG): #------------------------------------------------------------------------------- # Generic target and build rules: objects from C++ compilation -$(BUILDDIR)/%.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -139,22 +143,22 @@ endif #------------------------------------------------------------------------------- -cxx_objects=$(addprefix $(BUILDDIR)/, read_slha.o) -ifneq ($(GPUCC),) - gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_heft_$(GPUSUFFIX).o) +cxx_objects=$(addprefix $(BUILDDIR)/, read_slha_cpp.o) +ifeq ($(GPUCC),) + cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_heft_cpp.o) else - cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_heft.o) + gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_heft_$(GPUSUFFIX).o) endif # Target (and build rules): common (src) library -ifneq ($(GPUCC),) -$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) - @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) -else +ifeq ($(GPUCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) +else +$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) + @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi + $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt index 0ff1f087b1..af64a247ee 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -62,6 +62,54 @@ set auto_convert_model T save options auto_convert_model save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt import model heft +INFO: download model from https://madgraph.mi.infn.it/Downloads/models/heft.tgz to the following directory: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/models  +--2024-05-15 12:50:52-- https://madgraph.mi.infn.it/Downloads/models/heft.tgz +Resolving madgraph.mi.infn.it (madgraph.mi.infn.it)... 192.135.21.75 +Connecting to madgraph.mi.infn.it (madgraph.mi.infn.it)|192.135.21.75|:443... connected. +HTTP request sent, awaiting response... 200 OK +Length: 50876 (50K) [application/x-gzip] +Saving to: ‘tmp.tgz’ + + 0K .......... .......... .......... .......... ......... 100% 2.79M=0.02s + +2024-05-15 12:50:52 (2.79 MB/s) - ‘tmp.tgz’ saved [50876/50876] + +heft/ +heft/write_param_card.py +heft/restrict_ckm.dat +heft/couplings.py +heft/HEFT_UFO.log +heft/lorentz.py +heft/__init__.py +heft/__pycache__/ +heft/particles.py +heft/object_library.py +heft/restrict_default.dat +heft/restrict_zeromass_ckm.dat +heft/restrict_no_b_mass.dat +heft/function_library.py +heft/parameters.py +heft/py3_model.pkl +heft/coupling_orders.py +heft/restrict_no_tau_mass.dat +heft/vertices.py +heft/restrict_no_masses.dat +heft/__pycache__/write_param_card.cpython-311.pyc +heft/__pycache__/parameters.cpython-311.pyc +heft/__pycache__/function_library.cpython-311.pyc +heft/__pycache__/coupling_orders.cpython-311.pyc +heft/__pycache__/object_library.cpython-311.pyc +heft/__pycache__/couplings.cpython-311.pyc +heft/__pycache__/particles.cpython-311.pyc +heft/__pycache__/vertices.cpython-311.pyc +heft/__pycache__/lorentz.cpython-311.pyc +heft/__pycache__/__init__.cpython-311.pyc +INFO: reload from .py file +INFO: load particles +INFO: load vertices +WARNING: coupling GC_13=-(complex(0,1)*GH) has direct dependence in aS but has QCD order set to 0. Automatic computation of scale uncertainty can be wrong for such model.  +WARNING: coupling GC_16=(complex(0,1)*Gphi)/8. has direct dependence in aS but has QCD order set to 0. Automatic computation of scale uncertainty can be wrong for such model.  +DEBUG: model prefixing takes 0.005690574645996094  INFO: Restrict model heft with file models/heft/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: s u w+ at order: QED=1  @@ -157,7 +205,7 @@ ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.283 s +ALOHA: aloha creates 4 routines in 0.249 s VVS3 VVV1 FFV1 @@ -174,7 +222,7 @@ INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. quit -real 0m0.681s -user 0m0.624s -sys 0m0.053s +real 0m1.060s +user 0m0.751s +sys 0m0.075s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/cudacpp.mk index f047530438..f8d86fd493 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/cudacpp.mk @@ -54,65 +54,6 @@ endif #------------------------------------------------------------------------------- -#=== Configure common compiler flags for C++ and CUDA/HIP - -INCFLAGS = -I. -OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here - -# Dependency on src directory -MG5AMC_COMMONLIB = mg5amc_common -LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -INCFLAGS += -I../../src - -# Compiler-specific googletest build directory (#125 and #738) -ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) - override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) -else ifneq ($(shell $(CXX) --version | egrep '^clang'),) - override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) - override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else - override CXXNAME = unknown -endif -###$(info CXXNAME=$(CXXNAME)) -override CXXNAMESUFFIX = _$(CXXNAME) - -# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) -export CXXNAMESUFFIX - -# Dependency on test directory -# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) -# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) -###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation -###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation -TESTDIRCOMMON = ../../../../../test -TESTDIRLOCAL = ../../test -ifneq ($(wildcard $(GTEST_ROOT)),) - TESTDIR = -else ifneq ($(LOCALGTEST),) - TESTDIR=$(TESTDIRLOCAL) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) - TESTDIR = $(TESTDIRCOMMON) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else - TESTDIR = -endif -ifneq ($(GTEST_ROOT),) - GTESTLIBDIR = $(GTEST_ROOT)/lib64/ - GTESTLIBS = $(GTESTLIBDIR)/libgtest.a - GTESTINC = -I$(GTEST_ROOT)/include -else - GTESTLIBDIR = - GTESTLIBS = - GTESTINC = -endif -###$(info GTEST_ROOT = $(GTEST_ROOT)) -###$(info LOCALGTEST = $(LOCALGTEST)) -###$(info TESTDIR = $(TESTDIR)) - -#------------------------------------------------------------------------------- - #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available: eventually use native instead?) @@ -340,6 +281,69 @@ endif #------------------------------------------------------------------------------- +#=== Configure common compiler flags for C++ and CUDA/HIP + +INCFLAGS = -I. +OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here + +# Dependency on src directory +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif +LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +INCFLAGS += -I../../src + +# Compiler-specific googletest build directory (#125 and #738) +ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) + override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) +else ifneq ($(shell $(CXX) --version | egrep '^clang'),) + override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) + override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else + override CXXNAME = unknown +endif +###$(info CXXNAME=$(CXXNAME)) +override CXXNAMESUFFIX = _$(CXXNAME) + +# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) +export CXXNAMESUFFIX + +# Dependency on test directory +# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) +# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) +###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation +###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation +TESTDIRCOMMON = ../../../../../test +TESTDIRLOCAL = ../../test +ifneq ($(wildcard $(GTEST_ROOT)),) + TESTDIR = +else ifneq ($(LOCALGTEST),) + TESTDIR=$(TESTDIRLOCAL) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) + TESTDIR = $(TESTDIRCOMMON) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else + TESTDIR = +endif +ifneq ($(GTEST_ROOT),) + GTESTLIBDIR = $(GTEST_ROOT)/lib64/ + GTESTLIBS = $(GTESTLIBDIR)/libgtest.a + GTESTINC = -I$(GTEST_ROOT)/include +else + GTESTLIBDIR = + GTESTLIBS = + GTESTINC = +endif +###$(info GTEST_ROOT = $(GTEST_ROOT)) +###$(info LOCALGTEST = $(LOCALGTEST)) +###$(info TESTDIR = $(TESTDIR)) + +#------------------------------------------------------------------------------- + #=== Configure PowerPC-specific compiler flags for C++ and CUDA/HIP # PowerPC-specific CXX compiler flags (being reviewed) @@ -606,27 +610,25 @@ override RUNTIME = #=== Makefile TARGETS and build rules below #=============================================================================== -cxx_main=$(BUILDDIR)/check.exe -fcxx_main=$(BUILDDIR)/fcheck.exe -ifneq ($(GPUCC),) - gpu_main=$(BUILDDIR)/gcheck.exe - fgpu_main=$(BUILDDIR)/fgcheck.exe +ifeq ($(GPUCC),) + cxx_checkmain=$(BUILDDIR)/check_cpp.exe + cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe + cxx_testmain=$(BUILDDIR)/runTest_cpp.exe else - gpu_main= - fgpu_main= + gpu_checkmain=$(BUILDDIR)/check_$(GPUSUFFIX).exe + gpu_fcheckmain=$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe + gpu_testmain=$(BUILDDIR)/runTest_$(GPUSUFFIX).exe endif -testmain=$(BUILDDIR)/runTest.exe - # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) # First target (default goal) -ifneq ($(GPUCC),) -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_main) $(fgpu_main) $(if $(GTESTLIBS),$(testmain)) +ifeq ($(GPUCC),) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_checkmain) $(cxx_fcheckmain) $(if $(GTESTLIBS),$(cxx_testmain)) else -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_main) $(fcxx_main) $(if $(GTESTLIBS),$(testmain)) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_checkmain) $(gpu_fcheckmain) $(if $(GTESTLIBS),$(gpu_testmain)) endif # Target (and build options): debug @@ -643,30 +645,30 @@ $(BUILDDIR)/.build.$(TAG): @if [ "$(oldtagsb)" != "" ]; then echo "Cannot build for tag=$(TAG) as old builds exist for other tags:"; echo " $(oldtagsb)"; echo "Please run 'make clean' first\nIf 'make clean' is not enough: run 'make clean USEBUILDDIR=1 AVX=$(AVX) FPTYPE=$(FPTYPE)' or 'make cleanall'"; exit 1; fi @touch $(BUILDDIR)/.build.$(TAG) -# Apply special build flags only to CrossSectionKernel[_$(GPUSUFFIX)].o (no fast math, see #117 and #516) +# Apply special build flags only to CrossSectionKernel_.o (no fast math, see #117 and #516) # Added edgecase for HIP compilation ifeq ($(shell $(CXX) --version | grep ^nvc++),) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -fno-fast-math -$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math endif -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o (NVTX in timermap.h, #679) -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) +# Apply special build flags only to check_sa_.o (NVTX in timermap.h, #679) +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o and (Cu|Hip)randRandomNumberKernel[_$(GPUSUFFIX)].o -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(RNDCXXFLAGS) +# Apply special build flags only to check_sa_.o and (Cu|Hip)randRandomNumberKernel_.o +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) ifeq ($(HASCURAND),hasCurand) # curand headers, #679 -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUDA_INC) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(CUDA_INC) endif ifeq ($(HASHIPRAND),hasHiprand) # hiprand headers -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(HIP_INC) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(HIP_INC) endif # Avoid "warning: builtin __has_trivial_... is deprecated; use __is_trivially_... instead" in GPUCC with icx2023 (#592) @@ -679,21 +681,21 @@ endif # Avoid clang warning "overriding '-ffp-contract=fast' option with '-ffp-contract=on'" (#516) # This patch does remove the warning, but I prefer to keep it disabled for the moment... ###ifneq ($(shell $(CXX) --version | egrep '^(clang|Apple clang|Intel)'),) -###$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -Wno-overriding-t-option ###ifneq ($(GPUCC),) -###$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option ###endif ###endif #### Apply special build flags only to CPPProcess.o (-flto) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += -flto +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += -flto #### Apply special build flags only to CPPProcess.o (AVXFLAGS) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += $(AVXFLAGS) +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += $(AVXFLAGS) # Generic target and build rules: objects from C++ compilation # (NB do not include CUDA_INC here! add it only for NVTX or curand #679) -$(BUILDDIR)/%.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -718,8 +720,8 @@ processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') ###$(info processid_short=$(processid_short)) MG5AMC_CXXLIB = mg5amc_$(processid_short)_cpp -cxx_objects_lib=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o -cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel.o $(BUILDDIR)/RamboSamplingKernels.o +cxx_objects_lib=$(BUILDDIR)/CPPProcess_cpp.o $(BUILDDIR)/MatrixElementKernels_cpp.o $(BUILDDIR)/BridgeKernels_cpp.o $(BUILDDIR)/CrossSectionKernels_cpp.o +cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_cpp.o $(BUILDDIR)/RamboSamplingKernels_cpp.o ifneq ($(GPUCC),) MG5AMC_GPULIB = mg5amc_$(processid_short)_$(GPUSUFFIX) @@ -728,8 +730,8 @@ gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/ endif # Target (and build rules): C++ and CUDA/HIP shared libraries -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) @@ -756,63 +758,63 @@ endif #------------------------------------------------------------------------------- # Target (and build rules): C++ and CUDA/HIP standalone executables -$(cxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o - $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(RNDLIBFLAGS) +$(cxx_checkmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_checkmain): $(BUILDDIR)/check_sa_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o + $(CXX) -o $@ $(BUILDDIR)/check_sa_cpp.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o $(RNDLIBFLAGS) ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(gpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(gpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_checkmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_checkmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(gpu_main): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_checkmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(gpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(gpu_main): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o +$(gpu_checkmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_checkmain): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(GPUCC) -o $@ $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(RNDLIBFLAGS) endif #------------------------------------------------------------------------------- # Generic target and build rules: objects from Fortran compilation -$(BUILDDIR)/%.o : %.f *.inc +$(BUILDDIR)/%_fortran.o : %.f *.inc @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(FC) -I. -c $< -o $@ # Generic target and build rules: objects from Fortran compilation -###$(BUILDDIR)/%.o : %.f *.inc +###$(BUILDDIR)/%_fortran.o : %.f *.inc ### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi ### @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi ### $(FC) -I. -I$(INCDIR) -c $< -o $@ # Target (and build rules): Fortran standalone executables -###$(BUILDDIR)/fcheck_sa.o : $(INCDIR)/fbridge.inc +###$(BUILDDIR)/fcheck_sa_fortran.o : $(INCDIR)/fbridge.inc ifeq ($(UNAME_S),Darwin) -$(fcxx_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(cxx_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fcxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fcxx_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) +$(cxx_fcheckmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ else - $(CXX) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) + $(CXX) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) endif ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(fgpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(fgpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_fcheckmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_fcheckmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') endif ifeq ($(UNAME_S),Darwin) -$(fgpu_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(gpu_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fgpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fgpu_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) +$(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else - $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) + $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif endif @@ -820,83 +822,94 @@ endif # Target (and build rules): test objects and test executable ifeq ($(GPUCC),) -$(BUILDDIR)/testxxx.o: $(GTESTLIBS) -$(BUILDDIR)/testxxx.o: INCFLAGS += $(GTESTINC) -$(BUILDDIR)/testxxx.o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testxxx.o # Comment out this line to skip the C++ test of xxx functions +$(BUILDDIR)/testxxx_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testxxx_cpp.o: INCFLAGS += $(GTESTINC) +$(BUILDDIR)/testxxx_cpp.o: testxxx_cc_ref.txt +$(cxx_testmain): $(BUILDDIR)/testxxx_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testxxx_cpp.o # Comment out this line to skip the C++ test of xxx functions else $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions +$(gpu_testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions endif ifneq ($(UNAME_S),Darwin) # Disable testmisc on Darwin (workaround for issue #838) ifeq ($(GPUCC),) -$(BUILDDIR)/testmisc.o: $(GTESTLIBS) -$(BUILDDIR)/testmisc.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testmisc.o # Comment out this line to skip the C++ miscellaneous tests +$(BUILDDIR)/testmisc_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testmisc_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/testmisc_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testmisc_cpp.o # Comment out this line to skip the C++ miscellaneous tests else $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests +$(gpu_testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests endif endif ifeq ($(GPUCC),) -$(BUILDDIR)/runTest.o: $(GTESTLIBS) -$(BUILDDIR)/runTest.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/runTest.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/runTest.o +$(BUILDDIR)/runTest_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/runTest_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/runTest_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/runTest_cpp.o else $(BUILDDIR)/runTest_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/runTest_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o endif -$(testmain): $(GTESTLIBS) -$(testmain): INCFLAGS += $(GTESTINC) -$(testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest -###$(testmain): LIBFLAGS += -lgtest_main # no longer necessary since we added main() to testxxx.cc +ifeq ($(GPUCC),) +$(cxx_testmain): $(GTESTLIBS) +$(cxx_testmain): INCFLAGS += $(GTESTINC) +$(cxx_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +else +$(gpu_testmain): $(GTESTLIBS) +$(gpu_testmain): INCFLAGS += $(GTESTINC) +$(gpu_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +endif +ifeq ($(GPUCC),) # if at all, OMP is used only in CXX builds (not in GPU builds) ifneq ($(OMPFLAGS),) ifneq ($(shell $(CXX) --version | egrep '^Intel'),) -$(testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) +$(cxx_testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) else ifneq ($(shell $(CXX) --version | egrep '^clang'),) -$(testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 +$(cxx_testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 ###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -###$(testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) +###$(cxx_testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) else -$(testmain): LIBFLAGS += -lgomp +$(cxx_testmain): LIBFLAGS += -lgomp +endif endif endif # Test quadmath in testmisc.cc tests for constexpr_math #627 -###$(testmain): LIBFLAGS += -lquadmath +###ifeq ($(GPUCC),) +###$(cxx_testmain): LIBFLAGS += -lquadmath +###else +###$(gpu_testmain): LIBFLAGS += -lquadmath +###endif # Bypass std::filesystem completely to ease portability on LUMI #803 -#ifneq ($(findstring hipcc,$(GPUCC)),) -#$(testmain): LIBFLAGS += -lstdc++fs -#endif +###ifneq ($(findstring hipcc,$(GPUCC)),) +###$(gpu_testmain): LIBFLAGS += -lstdc++fs +###endif -ifeq ($(GPUCC),) # link only runTest.o -$(testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) +ifeq ($(GPUCC),) # link only runTest_cpp.o +$(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) $(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS) -else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest.o and runTest_$(GPUSUFFIX).o) -$(testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) +else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o) +$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else @@ -1051,51 +1064,50 @@ endif #------------------------------------------------------------------------------- -# Target: check (run the C++ test executable) -# [NB THIS IS WHAT IS USED IN THE GITHUB CI!] -# [FIXME: SHOULD CHANGE THE TARGET NAME "check" THAT HAS NOTHING TO DO WITH "check.exe"] -ifneq ($(GPUCC),) -check: runTest cmpFGcheck -else -check: runTest cmpFcheck -endif +# Target: 'make test' (execute runTest.exe, and compare check.exe with fcheck.exe) +# [NB: THIS IS WHAT IS TESTED IN THE GITHUB CI!] +# [NB: This used to be called 'make check' but the name has been changed as this has nothing to do with 'check.exe'] +test: runTest cmpFcheck -# Target: runTest (run the C++ test executable runTest.exe) +# Target: runTest (run the C++ or CUDA/HIP test executable runTest.exe) runTest: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/runTest.exe +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/runTest_cpp.exe +else + $(RUNTIME) $(BUILDDIR)/runTest_$(GPUSUFFIX).exe +endif -# Target: runCheck (run the C++ standalone executable check.exe, with a small number of events) +# Target: runCheck (run the C++ or CUDA/HIP standalone executable check.exe, with a small number of events) runCheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/check.exe -p 2 32 2 - -# Target: runGcheck (run the CUDA/HIP standalone executable gcheck.exe, with a small number of events) -runGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/gcheck.exe -p 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/check_cpp.exe -p 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +endif -# Target: runFcheck (run the Fortran standalone executable - with C++ MEs - fcheck.exe, with a small number of events) +# Target: runFcheck (run the Fortran standalone executable - with C++ or CUDA/HIP MEs - fcheck.exe, with a small number of events) runFcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 - -# Target: runFGcheck (run the Fortran standalone executable - with CUDA/HIP MEs - fgcheck.exe, with a small number of events) -runFGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 +endif -# Target: cmpFcheck (compare ME results from the C++ and Fortran with C++ MEs standalone executables, with a small number of events) +# Target: cmpFcheck (compare ME results from the C++/CUDA/HIP and Fortran with C++/CUDA/HIP MEs standalone executables, with a small number of events) cmpFcheck: all.$(TAG) @echo - @echo "$(BUILDDIR)/check.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/check.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi - -# Target: cmpFGcheck (compare ME results from the CUDA/HIP and Fortran with CUDA/HIP MEs standalone executables, with a small number of events) -cmpFGcheck: all.$(TAG) - @echo - @echo "$(BUILDDIR)/gcheck.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fgcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/gcheck.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +ifeq ($(GPUCC),) + @echo "$(BUILDDIR)/check_cpp.exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_cpp.exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_cpp.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +else + @echo "$(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +endif # Target: cuda-memcheck (run the CUDA standalone executable gcheck.exe with a small number of events through cuda-memcheck) cuda-memcheck: all.$(TAG) - $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/gcheck.exe -p 2 32 2 + $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/heft_gg_bb.sa/src/cudacpp_src.mk b/epochX/cudacpp/heft_gg_bb.sa/src/cudacpp_src.mk index b8581be64f..7811ab46bb 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/src/cudacpp_src.mk +++ b/epochX/cudacpp/heft_gg_bb.sa/src/cudacpp_src.mk @@ -98,7 +98,11 @@ endif # NB1: there are no CUDA targets in src as we avoid RDC! # NB2: CUDA includes for curand.h are no longer needed in the C++ code anywhere in src! -MG5AMC_COMMONLIB = mg5amc_common +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) @@ -126,7 +130,7 @@ $(LIBDIR)/.build.$(TAG): #------------------------------------------------------------------------------- # Generic target and build rules: objects from C++ compilation -$(BUILDDIR)/%.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -139,22 +143,22 @@ endif #------------------------------------------------------------------------------- -cxx_objects=$(addprefix $(BUILDDIR)/, read_slha.o) -ifneq ($(GPUCC),) - gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_heft_$(GPUSUFFIX).o) +cxx_objects=$(addprefix $(BUILDDIR)/, read_slha_cpp.o) +ifeq ($(GPUCC),) + cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_heft_cpp.o) else - cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_heft.o) + gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_heft_$(GPUSUFFIX).o) endif # Target (and build rules): common (src) library -ifneq ($(GPUCC),) -$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) - @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) -else +ifeq ($(GPUCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) +else +$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) + @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi + $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index e974458719..aa3a1e93aa 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0058176517486572266  +DEBUG: model prefixing takes 0.005166053771972656  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -172,7 +172,7 @@ INFO: Process u~ u > t t~ added to mirror process u u~ > t t~ INFO: Process c~ c > t t~ added to mirror process c c~ > t t~ INFO: Process d~ d > t t~ added to mirror process d d~ > t t~ INFO: Process s~ s > t t~ added to mirror process s s~ > t t~ -5 processes with 7 diagrams generated in 0.031 s +5 processes with 7 diagrams generated in 0.029 s Total: 5 processes with 7 diagrams add process p p > t t~ j @1 INFO: Checking for minimal orders which gives processes. @@ -212,7 +212,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.145 s +13 processes with 76 diagrams generated in 0.134 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -378,7 +378,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.960 s +65 processes with 1119 diagrams generated in 1.789 s Total: 83 processes with 1202 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -500,7 +500,7 @@ INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Creating files in directory P2_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -517,7 +517,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg INFO: Creating files in directory P2_gg_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -534,7 +534,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux INFO: Creating files in directory P2_gu_ttxgu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -551,7 +551,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu INFO: Creating files in directory P2_gux_ttxgux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -568,7 +568,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux INFO: Creating files in directory P2_uux_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -585,7 +585,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -602,7 +602,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P2_uu_ttxuu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -619,7 +619,7 @@ INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu INFO: Creating files in directory P2_uux_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -636,7 +636,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux INFO: Creating files in directory P2_uxux_ttxuxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -653,7 +653,7 @@ INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux INFO: Creating files in directory P2_uc_ttxuc DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -670,7 +670,7 @@ INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc INFO: Creating files in directory P2_uux_ttxccx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -687,7 +687,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx INFO: Creating files in directory P2_ucx_ttxucx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -704,7 +704,7 @@ INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx INFO: Creating files in directory P2_uxcx_ttxuxcx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -721,7 +721,7 @@ INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -738,7 +738,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -755,7 +755,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux INFO: Creating files in directory P1_uux_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -772,7 +772,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group uux_ttxg INFO: Creating files in directory P0_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -789,7 +789,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group gg_ttx INFO: Creating files in directory P0_uux_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -804,15 +804,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -Generated helas calls for 18 subprocesses (372 diagrams) in 1.378 s -Wrote files for 810 helas calls in 3.470 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.226 s +Wrote files for 810 helas calls in 3.151 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.362 s +ALOHA: aloha creates 5 routines in 0.313 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -820,7 +820,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.336 s +ALOHA: aloha creates 10 routines in 0.289 s VVV1 VVV1 FFV1 @@ -1030,10 +1030,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m9.362s -user 0m8.883s -sys 0m0.445s -Code generation completed in 9 seconds +real 0m8.549s +user 0m8.032s +sys 0m0.448s +Code generation completed in 8 seconds ************************************************************ * * * W E L C O M E to * @@ -1058,8 +1058,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run quit INFO: @@ -1088,8 +1088,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards param quit INFO: diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk index f047530438..f8d86fd493 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk @@ -54,65 +54,6 @@ endif #------------------------------------------------------------------------------- -#=== Configure common compiler flags for C++ and CUDA/HIP - -INCFLAGS = -I. -OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here - -# Dependency on src directory -MG5AMC_COMMONLIB = mg5amc_common -LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -INCFLAGS += -I../../src - -# Compiler-specific googletest build directory (#125 and #738) -ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) - override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) -else ifneq ($(shell $(CXX) --version | egrep '^clang'),) - override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) - override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else - override CXXNAME = unknown -endif -###$(info CXXNAME=$(CXXNAME)) -override CXXNAMESUFFIX = _$(CXXNAME) - -# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) -export CXXNAMESUFFIX - -# Dependency on test directory -# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) -# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) -###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation -###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation -TESTDIRCOMMON = ../../../../../test -TESTDIRLOCAL = ../../test -ifneq ($(wildcard $(GTEST_ROOT)),) - TESTDIR = -else ifneq ($(LOCALGTEST),) - TESTDIR=$(TESTDIRLOCAL) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) - TESTDIR = $(TESTDIRCOMMON) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else - TESTDIR = -endif -ifneq ($(GTEST_ROOT),) - GTESTLIBDIR = $(GTEST_ROOT)/lib64/ - GTESTLIBS = $(GTESTLIBDIR)/libgtest.a - GTESTINC = -I$(GTEST_ROOT)/include -else - GTESTLIBDIR = - GTESTLIBS = - GTESTINC = -endif -###$(info GTEST_ROOT = $(GTEST_ROOT)) -###$(info LOCALGTEST = $(LOCALGTEST)) -###$(info TESTDIR = $(TESTDIR)) - -#------------------------------------------------------------------------------- - #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available: eventually use native instead?) @@ -340,6 +281,69 @@ endif #------------------------------------------------------------------------------- +#=== Configure common compiler flags for C++ and CUDA/HIP + +INCFLAGS = -I. +OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here + +# Dependency on src directory +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif +LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +INCFLAGS += -I../../src + +# Compiler-specific googletest build directory (#125 and #738) +ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) + override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) +else ifneq ($(shell $(CXX) --version | egrep '^clang'),) + override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) + override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else + override CXXNAME = unknown +endif +###$(info CXXNAME=$(CXXNAME)) +override CXXNAMESUFFIX = _$(CXXNAME) + +# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) +export CXXNAMESUFFIX + +# Dependency on test directory +# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) +# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) +###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation +###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation +TESTDIRCOMMON = ../../../../../test +TESTDIRLOCAL = ../../test +ifneq ($(wildcard $(GTEST_ROOT)),) + TESTDIR = +else ifneq ($(LOCALGTEST),) + TESTDIR=$(TESTDIRLOCAL) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) + TESTDIR = $(TESTDIRCOMMON) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else + TESTDIR = +endif +ifneq ($(GTEST_ROOT),) + GTESTLIBDIR = $(GTEST_ROOT)/lib64/ + GTESTLIBS = $(GTESTLIBDIR)/libgtest.a + GTESTINC = -I$(GTEST_ROOT)/include +else + GTESTLIBDIR = + GTESTLIBS = + GTESTINC = +endif +###$(info GTEST_ROOT = $(GTEST_ROOT)) +###$(info LOCALGTEST = $(LOCALGTEST)) +###$(info TESTDIR = $(TESTDIR)) + +#------------------------------------------------------------------------------- + #=== Configure PowerPC-specific compiler flags for C++ and CUDA/HIP # PowerPC-specific CXX compiler flags (being reviewed) @@ -606,27 +610,25 @@ override RUNTIME = #=== Makefile TARGETS and build rules below #=============================================================================== -cxx_main=$(BUILDDIR)/check.exe -fcxx_main=$(BUILDDIR)/fcheck.exe -ifneq ($(GPUCC),) - gpu_main=$(BUILDDIR)/gcheck.exe - fgpu_main=$(BUILDDIR)/fgcheck.exe +ifeq ($(GPUCC),) + cxx_checkmain=$(BUILDDIR)/check_cpp.exe + cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe + cxx_testmain=$(BUILDDIR)/runTest_cpp.exe else - gpu_main= - fgpu_main= + gpu_checkmain=$(BUILDDIR)/check_$(GPUSUFFIX).exe + gpu_fcheckmain=$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe + gpu_testmain=$(BUILDDIR)/runTest_$(GPUSUFFIX).exe endif -testmain=$(BUILDDIR)/runTest.exe - # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) # First target (default goal) -ifneq ($(GPUCC),) -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_main) $(fgpu_main) $(if $(GTESTLIBS),$(testmain)) +ifeq ($(GPUCC),) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_checkmain) $(cxx_fcheckmain) $(if $(GTESTLIBS),$(cxx_testmain)) else -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_main) $(fcxx_main) $(if $(GTESTLIBS),$(testmain)) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_checkmain) $(gpu_fcheckmain) $(if $(GTESTLIBS),$(gpu_testmain)) endif # Target (and build options): debug @@ -643,30 +645,30 @@ $(BUILDDIR)/.build.$(TAG): @if [ "$(oldtagsb)" != "" ]; then echo "Cannot build for tag=$(TAG) as old builds exist for other tags:"; echo " $(oldtagsb)"; echo "Please run 'make clean' first\nIf 'make clean' is not enough: run 'make clean USEBUILDDIR=1 AVX=$(AVX) FPTYPE=$(FPTYPE)' or 'make cleanall'"; exit 1; fi @touch $(BUILDDIR)/.build.$(TAG) -# Apply special build flags only to CrossSectionKernel[_$(GPUSUFFIX)].o (no fast math, see #117 and #516) +# Apply special build flags only to CrossSectionKernel_.o (no fast math, see #117 and #516) # Added edgecase for HIP compilation ifeq ($(shell $(CXX) --version | grep ^nvc++),) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -fno-fast-math -$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math endif -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o (NVTX in timermap.h, #679) -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) +# Apply special build flags only to check_sa_.o (NVTX in timermap.h, #679) +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o and (Cu|Hip)randRandomNumberKernel[_$(GPUSUFFIX)].o -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(RNDCXXFLAGS) +# Apply special build flags only to check_sa_.o and (Cu|Hip)randRandomNumberKernel_.o +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) ifeq ($(HASCURAND),hasCurand) # curand headers, #679 -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUDA_INC) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(CUDA_INC) endif ifeq ($(HASHIPRAND),hasHiprand) # hiprand headers -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(HIP_INC) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(HIP_INC) endif # Avoid "warning: builtin __has_trivial_... is deprecated; use __is_trivially_... instead" in GPUCC with icx2023 (#592) @@ -679,21 +681,21 @@ endif # Avoid clang warning "overriding '-ffp-contract=fast' option with '-ffp-contract=on'" (#516) # This patch does remove the warning, but I prefer to keep it disabled for the moment... ###ifneq ($(shell $(CXX) --version | egrep '^(clang|Apple clang|Intel)'),) -###$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -Wno-overriding-t-option ###ifneq ($(GPUCC),) -###$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option ###endif ###endif #### Apply special build flags only to CPPProcess.o (-flto) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += -flto +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += -flto #### Apply special build flags only to CPPProcess.o (AVXFLAGS) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += $(AVXFLAGS) +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += $(AVXFLAGS) # Generic target and build rules: objects from C++ compilation # (NB do not include CUDA_INC here! add it only for NVTX or curand #679) -$(BUILDDIR)/%.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -718,8 +720,8 @@ processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') ###$(info processid_short=$(processid_short)) MG5AMC_CXXLIB = mg5amc_$(processid_short)_cpp -cxx_objects_lib=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o -cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel.o $(BUILDDIR)/RamboSamplingKernels.o +cxx_objects_lib=$(BUILDDIR)/CPPProcess_cpp.o $(BUILDDIR)/MatrixElementKernels_cpp.o $(BUILDDIR)/BridgeKernels_cpp.o $(BUILDDIR)/CrossSectionKernels_cpp.o +cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_cpp.o $(BUILDDIR)/RamboSamplingKernels_cpp.o ifneq ($(GPUCC),) MG5AMC_GPULIB = mg5amc_$(processid_short)_$(GPUSUFFIX) @@ -728,8 +730,8 @@ gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/ endif # Target (and build rules): C++ and CUDA/HIP shared libraries -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) @@ -756,63 +758,63 @@ endif #------------------------------------------------------------------------------- # Target (and build rules): C++ and CUDA/HIP standalone executables -$(cxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o - $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(RNDLIBFLAGS) +$(cxx_checkmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_checkmain): $(BUILDDIR)/check_sa_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o + $(CXX) -o $@ $(BUILDDIR)/check_sa_cpp.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o $(RNDLIBFLAGS) ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(gpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(gpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_checkmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_checkmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(gpu_main): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_checkmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(gpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(gpu_main): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o +$(gpu_checkmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_checkmain): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(GPUCC) -o $@ $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(RNDLIBFLAGS) endif #------------------------------------------------------------------------------- # Generic target and build rules: objects from Fortran compilation -$(BUILDDIR)/%.o : %.f *.inc +$(BUILDDIR)/%_fortran.o : %.f *.inc @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(FC) -I. -c $< -o $@ # Generic target and build rules: objects from Fortran compilation -###$(BUILDDIR)/%.o : %.f *.inc +###$(BUILDDIR)/%_fortran.o : %.f *.inc ### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi ### @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi ### $(FC) -I. -I$(INCDIR) -c $< -o $@ # Target (and build rules): Fortran standalone executables -###$(BUILDDIR)/fcheck_sa.o : $(INCDIR)/fbridge.inc +###$(BUILDDIR)/fcheck_sa_fortran.o : $(INCDIR)/fbridge.inc ifeq ($(UNAME_S),Darwin) -$(fcxx_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(cxx_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fcxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fcxx_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) +$(cxx_fcheckmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ else - $(CXX) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) + $(CXX) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) endif ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(fgpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(fgpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_fcheckmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_fcheckmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') endif ifeq ($(UNAME_S),Darwin) -$(fgpu_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(gpu_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fgpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fgpu_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) +$(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else - $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) + $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif endif @@ -820,83 +822,94 @@ endif # Target (and build rules): test objects and test executable ifeq ($(GPUCC),) -$(BUILDDIR)/testxxx.o: $(GTESTLIBS) -$(BUILDDIR)/testxxx.o: INCFLAGS += $(GTESTINC) -$(BUILDDIR)/testxxx.o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testxxx.o # Comment out this line to skip the C++ test of xxx functions +$(BUILDDIR)/testxxx_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testxxx_cpp.o: INCFLAGS += $(GTESTINC) +$(BUILDDIR)/testxxx_cpp.o: testxxx_cc_ref.txt +$(cxx_testmain): $(BUILDDIR)/testxxx_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testxxx_cpp.o # Comment out this line to skip the C++ test of xxx functions else $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions +$(gpu_testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions endif ifneq ($(UNAME_S),Darwin) # Disable testmisc on Darwin (workaround for issue #838) ifeq ($(GPUCC),) -$(BUILDDIR)/testmisc.o: $(GTESTLIBS) -$(BUILDDIR)/testmisc.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testmisc.o # Comment out this line to skip the C++ miscellaneous tests +$(BUILDDIR)/testmisc_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testmisc_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/testmisc_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testmisc_cpp.o # Comment out this line to skip the C++ miscellaneous tests else $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests +$(gpu_testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests endif endif ifeq ($(GPUCC),) -$(BUILDDIR)/runTest.o: $(GTESTLIBS) -$(BUILDDIR)/runTest.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/runTest.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/runTest.o +$(BUILDDIR)/runTest_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/runTest_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/runTest_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/runTest_cpp.o else $(BUILDDIR)/runTest_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/runTest_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o endif -$(testmain): $(GTESTLIBS) -$(testmain): INCFLAGS += $(GTESTINC) -$(testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest -###$(testmain): LIBFLAGS += -lgtest_main # no longer necessary since we added main() to testxxx.cc +ifeq ($(GPUCC),) +$(cxx_testmain): $(GTESTLIBS) +$(cxx_testmain): INCFLAGS += $(GTESTINC) +$(cxx_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +else +$(gpu_testmain): $(GTESTLIBS) +$(gpu_testmain): INCFLAGS += $(GTESTINC) +$(gpu_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +endif +ifeq ($(GPUCC),) # if at all, OMP is used only in CXX builds (not in GPU builds) ifneq ($(OMPFLAGS),) ifneq ($(shell $(CXX) --version | egrep '^Intel'),) -$(testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) +$(cxx_testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) else ifneq ($(shell $(CXX) --version | egrep '^clang'),) -$(testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 +$(cxx_testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 ###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -###$(testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) +###$(cxx_testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) else -$(testmain): LIBFLAGS += -lgomp +$(cxx_testmain): LIBFLAGS += -lgomp +endif endif endif # Test quadmath in testmisc.cc tests for constexpr_math #627 -###$(testmain): LIBFLAGS += -lquadmath +###ifeq ($(GPUCC),) +###$(cxx_testmain): LIBFLAGS += -lquadmath +###else +###$(gpu_testmain): LIBFLAGS += -lquadmath +###endif # Bypass std::filesystem completely to ease portability on LUMI #803 -#ifneq ($(findstring hipcc,$(GPUCC)),) -#$(testmain): LIBFLAGS += -lstdc++fs -#endif +###ifneq ($(findstring hipcc,$(GPUCC)),) +###$(gpu_testmain): LIBFLAGS += -lstdc++fs +###endif -ifeq ($(GPUCC),) # link only runTest.o -$(testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) +ifeq ($(GPUCC),) # link only runTest_cpp.o +$(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) $(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS) -else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest.o and runTest_$(GPUSUFFIX).o) -$(testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) +else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o) +$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else @@ -1051,51 +1064,50 @@ endif #------------------------------------------------------------------------------- -# Target: check (run the C++ test executable) -# [NB THIS IS WHAT IS USED IN THE GITHUB CI!] -# [FIXME: SHOULD CHANGE THE TARGET NAME "check" THAT HAS NOTHING TO DO WITH "check.exe"] -ifneq ($(GPUCC),) -check: runTest cmpFGcheck -else -check: runTest cmpFcheck -endif +# Target: 'make test' (execute runTest.exe, and compare check.exe with fcheck.exe) +# [NB: THIS IS WHAT IS TESTED IN THE GITHUB CI!] +# [NB: This used to be called 'make check' but the name has been changed as this has nothing to do with 'check.exe'] +test: runTest cmpFcheck -# Target: runTest (run the C++ test executable runTest.exe) +# Target: runTest (run the C++ or CUDA/HIP test executable runTest.exe) runTest: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/runTest.exe +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/runTest_cpp.exe +else + $(RUNTIME) $(BUILDDIR)/runTest_$(GPUSUFFIX).exe +endif -# Target: runCheck (run the C++ standalone executable check.exe, with a small number of events) +# Target: runCheck (run the C++ or CUDA/HIP standalone executable check.exe, with a small number of events) runCheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/check.exe -p 2 32 2 - -# Target: runGcheck (run the CUDA/HIP standalone executable gcheck.exe, with a small number of events) -runGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/gcheck.exe -p 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/check_cpp.exe -p 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +endif -# Target: runFcheck (run the Fortran standalone executable - with C++ MEs - fcheck.exe, with a small number of events) +# Target: runFcheck (run the Fortran standalone executable - with C++ or CUDA/HIP MEs - fcheck.exe, with a small number of events) runFcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 - -# Target: runFGcheck (run the Fortran standalone executable - with CUDA/HIP MEs - fgcheck.exe, with a small number of events) -runFGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 +endif -# Target: cmpFcheck (compare ME results from the C++ and Fortran with C++ MEs standalone executables, with a small number of events) +# Target: cmpFcheck (compare ME results from the C++/CUDA/HIP and Fortran with C++/CUDA/HIP MEs standalone executables, with a small number of events) cmpFcheck: all.$(TAG) @echo - @echo "$(BUILDDIR)/check.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/check.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi - -# Target: cmpFGcheck (compare ME results from the CUDA/HIP and Fortran with CUDA/HIP MEs standalone executables, with a small number of events) -cmpFGcheck: all.$(TAG) - @echo - @echo "$(BUILDDIR)/gcheck.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fgcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/gcheck.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +ifeq ($(GPUCC),) + @echo "$(BUILDDIR)/check_cpp.exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_cpp.exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_cpp.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +else + @echo "$(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +endif # Target: cuda-memcheck (run the CUDA standalone executable gcheck.exe with a small number of events through cuda-memcheck) cuda-memcheck: all.$(TAG) - $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/gcheck.exe -p 2 32 2 + $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/makefile b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/makefile index 22517d0cc6..6a21300f7a 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/makefile +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/makefile @@ -58,13 +58,15 @@ endif LINKLIBS = $(LINK_MADLOOP_LIB) $(LINK_LOOP_LIBS) -L$(LIBDIR) -ldhelas -ldsample -lmodel -lgeneric -lpdf -lcernlib $(llhapdf) -lbias CUDACPP_MAKEFILE=cudacpp.mk -CUDACPP_COMMONLIB=mg5amc_common processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') ifeq ($(BACKEND),cuda) +CUDACPP_COMMONLIB=mg5amc_common_cuda CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_cuda else ifeq ($(BACKEND),hip) +CUDACPP_COMMONLIB=mg5amc_common_hip CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_hip else +CUDACPP_COMMONLIB=mg5amc_common_cpp CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_cpp endif diff --git a/epochX/cudacpp/pp_tt012j.mad/src/cudacpp_src.mk b/epochX/cudacpp/pp_tt012j.mad/src/cudacpp_src.mk index f727c0b313..d4cc8f4b78 100644 --- a/epochX/cudacpp/pp_tt012j.mad/src/cudacpp_src.mk +++ b/epochX/cudacpp/pp_tt012j.mad/src/cudacpp_src.mk @@ -98,7 +98,11 @@ endif # NB1: there are no CUDA targets in src as we avoid RDC! # NB2: CUDA includes for curand.h are no longer needed in the C++ code anywhere in src! -MG5AMC_COMMONLIB = mg5amc_common +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) @@ -126,7 +130,7 @@ $(LIBDIR)/.build.$(TAG): #------------------------------------------------------------------------------- # Generic target and build rules: objects from C++ compilation -$(BUILDDIR)/%.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -139,22 +143,22 @@ endif #------------------------------------------------------------------------------- -cxx_objects=$(addprefix $(BUILDDIR)/, read_slha.o) -ifneq ($(GPUCC),) - gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_sm_$(GPUSUFFIX).o) +cxx_objects=$(addprefix $(BUILDDIR)/, read_slha_cpp.o) +ifeq ($(GPUCC),) + cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_sm_cpp.o) else - cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_sm.o) + gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_sm_$(GPUSUFFIX).o) endif # Target (and build rules): common (src) library -ifneq ($(GPUCC),) -$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) - @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) -else +ifeq ($(GPUCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) +else +$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) + @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi + $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt index 15ffc09ea7..301164c091 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -77,7 +77,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.1477065086364746  +DEBUG: model prefixing takes 0.122955322265625  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -92,7 +92,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 3.958 s +1 processes with 72 diagrams generated in 3.543 s Total: 1 processes with 72 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_smeft_gg_tttt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -115,7 +115,7 @@ INFO: Generating Helas calls for process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ t t~ @1 INFO: Creating files in directory P1_gg_ttxttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -130,15 +130,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxttx -Generated helas calls for 1 subprocesses (72 diagrams) in 0.200 s -Wrote files for 119 helas calls in 0.443 s +Generated helas calls for 1 subprocesses (72 diagrams) in 0.179 s +Wrote files for 119 helas calls in 0.395 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.336 s +ALOHA: aloha creates 5 routines in 0.299 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines @@ -146,7 +146,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 10 routines in 0.351 s +ALOHA: aloha creates 10 routines in 0.317 s VVV5 VVV5 FFV1 @@ -191,10 +191,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m7.238s -user 0m6.991s -sys 0m0.229s -Code generation completed in 8 seconds +real 0m6.544s +user 0m6.256s +sys 0m0.262s +Code generation completed in 7 seconds ************************************************************ * * * W E L C O M E to * @@ -219,8 +219,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run quit INFO: @@ -249,8 +249,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards param quit INFO: diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cudacpp.mk index f047530438..f8d86fd493 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cudacpp.mk @@ -54,65 +54,6 @@ endif #------------------------------------------------------------------------------- -#=== Configure common compiler flags for C++ and CUDA/HIP - -INCFLAGS = -I. -OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here - -# Dependency on src directory -MG5AMC_COMMONLIB = mg5amc_common -LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -INCFLAGS += -I../../src - -# Compiler-specific googletest build directory (#125 and #738) -ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) - override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) -else ifneq ($(shell $(CXX) --version | egrep '^clang'),) - override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) - override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else - override CXXNAME = unknown -endif -###$(info CXXNAME=$(CXXNAME)) -override CXXNAMESUFFIX = _$(CXXNAME) - -# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) -export CXXNAMESUFFIX - -# Dependency on test directory -# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) -# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) -###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation -###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation -TESTDIRCOMMON = ../../../../../test -TESTDIRLOCAL = ../../test -ifneq ($(wildcard $(GTEST_ROOT)),) - TESTDIR = -else ifneq ($(LOCALGTEST),) - TESTDIR=$(TESTDIRLOCAL) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) - TESTDIR = $(TESTDIRCOMMON) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else - TESTDIR = -endif -ifneq ($(GTEST_ROOT),) - GTESTLIBDIR = $(GTEST_ROOT)/lib64/ - GTESTLIBS = $(GTESTLIBDIR)/libgtest.a - GTESTINC = -I$(GTEST_ROOT)/include -else - GTESTLIBDIR = - GTESTLIBS = - GTESTINC = -endif -###$(info GTEST_ROOT = $(GTEST_ROOT)) -###$(info LOCALGTEST = $(LOCALGTEST)) -###$(info TESTDIR = $(TESTDIR)) - -#------------------------------------------------------------------------------- - #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available: eventually use native instead?) @@ -340,6 +281,69 @@ endif #------------------------------------------------------------------------------- +#=== Configure common compiler flags for C++ and CUDA/HIP + +INCFLAGS = -I. +OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here + +# Dependency on src directory +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif +LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +INCFLAGS += -I../../src + +# Compiler-specific googletest build directory (#125 and #738) +ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) + override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) +else ifneq ($(shell $(CXX) --version | egrep '^clang'),) + override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) + override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else + override CXXNAME = unknown +endif +###$(info CXXNAME=$(CXXNAME)) +override CXXNAMESUFFIX = _$(CXXNAME) + +# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) +export CXXNAMESUFFIX + +# Dependency on test directory +# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) +# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) +###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation +###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation +TESTDIRCOMMON = ../../../../../test +TESTDIRLOCAL = ../../test +ifneq ($(wildcard $(GTEST_ROOT)),) + TESTDIR = +else ifneq ($(LOCALGTEST),) + TESTDIR=$(TESTDIRLOCAL) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) + TESTDIR = $(TESTDIRCOMMON) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else + TESTDIR = +endif +ifneq ($(GTEST_ROOT),) + GTESTLIBDIR = $(GTEST_ROOT)/lib64/ + GTESTLIBS = $(GTESTLIBDIR)/libgtest.a + GTESTINC = -I$(GTEST_ROOT)/include +else + GTESTLIBDIR = + GTESTLIBS = + GTESTINC = +endif +###$(info GTEST_ROOT = $(GTEST_ROOT)) +###$(info LOCALGTEST = $(LOCALGTEST)) +###$(info TESTDIR = $(TESTDIR)) + +#------------------------------------------------------------------------------- + #=== Configure PowerPC-specific compiler flags for C++ and CUDA/HIP # PowerPC-specific CXX compiler flags (being reviewed) @@ -606,27 +610,25 @@ override RUNTIME = #=== Makefile TARGETS and build rules below #=============================================================================== -cxx_main=$(BUILDDIR)/check.exe -fcxx_main=$(BUILDDIR)/fcheck.exe -ifneq ($(GPUCC),) - gpu_main=$(BUILDDIR)/gcheck.exe - fgpu_main=$(BUILDDIR)/fgcheck.exe +ifeq ($(GPUCC),) + cxx_checkmain=$(BUILDDIR)/check_cpp.exe + cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe + cxx_testmain=$(BUILDDIR)/runTest_cpp.exe else - gpu_main= - fgpu_main= + gpu_checkmain=$(BUILDDIR)/check_$(GPUSUFFIX).exe + gpu_fcheckmain=$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe + gpu_testmain=$(BUILDDIR)/runTest_$(GPUSUFFIX).exe endif -testmain=$(BUILDDIR)/runTest.exe - # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) # First target (default goal) -ifneq ($(GPUCC),) -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_main) $(fgpu_main) $(if $(GTESTLIBS),$(testmain)) +ifeq ($(GPUCC),) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_checkmain) $(cxx_fcheckmain) $(if $(GTESTLIBS),$(cxx_testmain)) else -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_main) $(fcxx_main) $(if $(GTESTLIBS),$(testmain)) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_checkmain) $(gpu_fcheckmain) $(if $(GTESTLIBS),$(gpu_testmain)) endif # Target (and build options): debug @@ -643,30 +645,30 @@ $(BUILDDIR)/.build.$(TAG): @if [ "$(oldtagsb)" != "" ]; then echo "Cannot build for tag=$(TAG) as old builds exist for other tags:"; echo " $(oldtagsb)"; echo "Please run 'make clean' first\nIf 'make clean' is not enough: run 'make clean USEBUILDDIR=1 AVX=$(AVX) FPTYPE=$(FPTYPE)' or 'make cleanall'"; exit 1; fi @touch $(BUILDDIR)/.build.$(TAG) -# Apply special build flags only to CrossSectionKernel[_$(GPUSUFFIX)].o (no fast math, see #117 and #516) +# Apply special build flags only to CrossSectionKernel_.o (no fast math, see #117 and #516) # Added edgecase for HIP compilation ifeq ($(shell $(CXX) --version | grep ^nvc++),) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -fno-fast-math -$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math endif -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o (NVTX in timermap.h, #679) -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) +# Apply special build flags only to check_sa_.o (NVTX in timermap.h, #679) +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o and (Cu|Hip)randRandomNumberKernel[_$(GPUSUFFIX)].o -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(RNDCXXFLAGS) +# Apply special build flags only to check_sa_.o and (Cu|Hip)randRandomNumberKernel_.o +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) ifeq ($(HASCURAND),hasCurand) # curand headers, #679 -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUDA_INC) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(CUDA_INC) endif ifeq ($(HASHIPRAND),hasHiprand) # hiprand headers -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(HIP_INC) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(HIP_INC) endif # Avoid "warning: builtin __has_trivial_... is deprecated; use __is_trivially_... instead" in GPUCC with icx2023 (#592) @@ -679,21 +681,21 @@ endif # Avoid clang warning "overriding '-ffp-contract=fast' option with '-ffp-contract=on'" (#516) # This patch does remove the warning, but I prefer to keep it disabled for the moment... ###ifneq ($(shell $(CXX) --version | egrep '^(clang|Apple clang|Intel)'),) -###$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -Wno-overriding-t-option ###ifneq ($(GPUCC),) -###$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option ###endif ###endif #### Apply special build flags only to CPPProcess.o (-flto) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += -flto +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += -flto #### Apply special build flags only to CPPProcess.o (AVXFLAGS) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += $(AVXFLAGS) +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += $(AVXFLAGS) # Generic target and build rules: objects from C++ compilation # (NB do not include CUDA_INC here! add it only for NVTX or curand #679) -$(BUILDDIR)/%.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -718,8 +720,8 @@ processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') ###$(info processid_short=$(processid_short)) MG5AMC_CXXLIB = mg5amc_$(processid_short)_cpp -cxx_objects_lib=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o -cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel.o $(BUILDDIR)/RamboSamplingKernels.o +cxx_objects_lib=$(BUILDDIR)/CPPProcess_cpp.o $(BUILDDIR)/MatrixElementKernels_cpp.o $(BUILDDIR)/BridgeKernels_cpp.o $(BUILDDIR)/CrossSectionKernels_cpp.o +cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_cpp.o $(BUILDDIR)/RamboSamplingKernels_cpp.o ifneq ($(GPUCC),) MG5AMC_GPULIB = mg5amc_$(processid_short)_$(GPUSUFFIX) @@ -728,8 +730,8 @@ gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/ endif # Target (and build rules): C++ and CUDA/HIP shared libraries -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) @@ -756,63 +758,63 @@ endif #------------------------------------------------------------------------------- # Target (and build rules): C++ and CUDA/HIP standalone executables -$(cxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o - $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(RNDLIBFLAGS) +$(cxx_checkmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_checkmain): $(BUILDDIR)/check_sa_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o + $(CXX) -o $@ $(BUILDDIR)/check_sa_cpp.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o $(RNDLIBFLAGS) ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(gpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(gpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_checkmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_checkmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(gpu_main): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_checkmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(gpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(gpu_main): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o +$(gpu_checkmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_checkmain): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(GPUCC) -o $@ $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(RNDLIBFLAGS) endif #------------------------------------------------------------------------------- # Generic target and build rules: objects from Fortran compilation -$(BUILDDIR)/%.o : %.f *.inc +$(BUILDDIR)/%_fortran.o : %.f *.inc @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(FC) -I. -c $< -o $@ # Generic target and build rules: objects from Fortran compilation -###$(BUILDDIR)/%.o : %.f *.inc +###$(BUILDDIR)/%_fortran.o : %.f *.inc ### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi ### @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi ### $(FC) -I. -I$(INCDIR) -c $< -o $@ # Target (and build rules): Fortran standalone executables -###$(BUILDDIR)/fcheck_sa.o : $(INCDIR)/fbridge.inc +###$(BUILDDIR)/fcheck_sa_fortran.o : $(INCDIR)/fbridge.inc ifeq ($(UNAME_S),Darwin) -$(fcxx_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(cxx_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fcxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fcxx_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) +$(cxx_fcheckmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ else - $(CXX) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) + $(CXX) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) endif ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(fgpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(fgpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_fcheckmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_fcheckmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') endif ifeq ($(UNAME_S),Darwin) -$(fgpu_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(gpu_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fgpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fgpu_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) +$(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else - $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) + $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif endif @@ -820,83 +822,94 @@ endif # Target (and build rules): test objects and test executable ifeq ($(GPUCC),) -$(BUILDDIR)/testxxx.o: $(GTESTLIBS) -$(BUILDDIR)/testxxx.o: INCFLAGS += $(GTESTINC) -$(BUILDDIR)/testxxx.o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testxxx.o # Comment out this line to skip the C++ test of xxx functions +$(BUILDDIR)/testxxx_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testxxx_cpp.o: INCFLAGS += $(GTESTINC) +$(BUILDDIR)/testxxx_cpp.o: testxxx_cc_ref.txt +$(cxx_testmain): $(BUILDDIR)/testxxx_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testxxx_cpp.o # Comment out this line to skip the C++ test of xxx functions else $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions +$(gpu_testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions endif ifneq ($(UNAME_S),Darwin) # Disable testmisc on Darwin (workaround for issue #838) ifeq ($(GPUCC),) -$(BUILDDIR)/testmisc.o: $(GTESTLIBS) -$(BUILDDIR)/testmisc.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testmisc.o # Comment out this line to skip the C++ miscellaneous tests +$(BUILDDIR)/testmisc_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testmisc_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/testmisc_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testmisc_cpp.o # Comment out this line to skip the C++ miscellaneous tests else $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests +$(gpu_testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests endif endif ifeq ($(GPUCC),) -$(BUILDDIR)/runTest.o: $(GTESTLIBS) -$(BUILDDIR)/runTest.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/runTest.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/runTest.o +$(BUILDDIR)/runTest_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/runTest_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/runTest_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/runTest_cpp.o else $(BUILDDIR)/runTest_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/runTest_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o endif -$(testmain): $(GTESTLIBS) -$(testmain): INCFLAGS += $(GTESTINC) -$(testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest -###$(testmain): LIBFLAGS += -lgtest_main # no longer necessary since we added main() to testxxx.cc +ifeq ($(GPUCC),) +$(cxx_testmain): $(GTESTLIBS) +$(cxx_testmain): INCFLAGS += $(GTESTINC) +$(cxx_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +else +$(gpu_testmain): $(GTESTLIBS) +$(gpu_testmain): INCFLAGS += $(GTESTINC) +$(gpu_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +endif +ifeq ($(GPUCC),) # if at all, OMP is used only in CXX builds (not in GPU builds) ifneq ($(OMPFLAGS),) ifneq ($(shell $(CXX) --version | egrep '^Intel'),) -$(testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) +$(cxx_testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) else ifneq ($(shell $(CXX) --version | egrep '^clang'),) -$(testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 +$(cxx_testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 ###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -###$(testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) +###$(cxx_testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) else -$(testmain): LIBFLAGS += -lgomp +$(cxx_testmain): LIBFLAGS += -lgomp +endif endif endif # Test quadmath in testmisc.cc tests for constexpr_math #627 -###$(testmain): LIBFLAGS += -lquadmath +###ifeq ($(GPUCC),) +###$(cxx_testmain): LIBFLAGS += -lquadmath +###else +###$(gpu_testmain): LIBFLAGS += -lquadmath +###endif # Bypass std::filesystem completely to ease portability on LUMI #803 -#ifneq ($(findstring hipcc,$(GPUCC)),) -#$(testmain): LIBFLAGS += -lstdc++fs -#endif +###ifneq ($(findstring hipcc,$(GPUCC)),) +###$(gpu_testmain): LIBFLAGS += -lstdc++fs +###endif -ifeq ($(GPUCC),) # link only runTest.o -$(testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) +ifeq ($(GPUCC),) # link only runTest_cpp.o +$(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) $(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS) -else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest.o and runTest_$(GPUSUFFIX).o) -$(testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) +else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o) +$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else @@ -1051,51 +1064,50 @@ endif #------------------------------------------------------------------------------- -# Target: check (run the C++ test executable) -# [NB THIS IS WHAT IS USED IN THE GITHUB CI!] -# [FIXME: SHOULD CHANGE THE TARGET NAME "check" THAT HAS NOTHING TO DO WITH "check.exe"] -ifneq ($(GPUCC),) -check: runTest cmpFGcheck -else -check: runTest cmpFcheck -endif +# Target: 'make test' (execute runTest.exe, and compare check.exe with fcheck.exe) +# [NB: THIS IS WHAT IS TESTED IN THE GITHUB CI!] +# [NB: This used to be called 'make check' but the name has been changed as this has nothing to do with 'check.exe'] +test: runTest cmpFcheck -# Target: runTest (run the C++ test executable runTest.exe) +# Target: runTest (run the C++ or CUDA/HIP test executable runTest.exe) runTest: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/runTest.exe +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/runTest_cpp.exe +else + $(RUNTIME) $(BUILDDIR)/runTest_$(GPUSUFFIX).exe +endif -# Target: runCheck (run the C++ standalone executable check.exe, with a small number of events) +# Target: runCheck (run the C++ or CUDA/HIP standalone executable check.exe, with a small number of events) runCheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/check.exe -p 2 32 2 - -# Target: runGcheck (run the CUDA/HIP standalone executable gcheck.exe, with a small number of events) -runGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/gcheck.exe -p 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/check_cpp.exe -p 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +endif -# Target: runFcheck (run the Fortran standalone executable - with C++ MEs - fcheck.exe, with a small number of events) +# Target: runFcheck (run the Fortran standalone executable - with C++ or CUDA/HIP MEs - fcheck.exe, with a small number of events) runFcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 - -# Target: runFGcheck (run the Fortran standalone executable - with CUDA/HIP MEs - fgcheck.exe, with a small number of events) -runFGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 +endif -# Target: cmpFcheck (compare ME results from the C++ and Fortran with C++ MEs standalone executables, with a small number of events) +# Target: cmpFcheck (compare ME results from the C++/CUDA/HIP and Fortran with C++/CUDA/HIP MEs standalone executables, with a small number of events) cmpFcheck: all.$(TAG) @echo - @echo "$(BUILDDIR)/check.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/check.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi - -# Target: cmpFGcheck (compare ME results from the CUDA/HIP and Fortran with CUDA/HIP MEs standalone executables, with a small number of events) -cmpFGcheck: all.$(TAG) - @echo - @echo "$(BUILDDIR)/gcheck.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fgcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/gcheck.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +ifeq ($(GPUCC),) + @echo "$(BUILDDIR)/check_cpp.exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_cpp.exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_cpp.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +else + @echo "$(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +endif # Target: cuda-memcheck (run the CUDA standalone executable gcheck.exe with a small number of events through cuda-memcheck) cuda-memcheck: all.$(TAG) - $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/gcheck.exe -p 2 32 2 + $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/makefile b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/makefile index 22517d0cc6..6a21300f7a 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/makefile +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/makefile @@ -58,13 +58,15 @@ endif LINKLIBS = $(LINK_MADLOOP_LIB) $(LINK_LOOP_LIBS) -L$(LIBDIR) -ldhelas -ldsample -lmodel -lgeneric -lpdf -lcernlib $(llhapdf) -lbias CUDACPP_MAKEFILE=cudacpp.mk -CUDACPP_COMMONLIB=mg5amc_common processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') ifeq ($(BACKEND),cuda) +CUDACPP_COMMONLIB=mg5amc_common_cuda CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_cuda else ifeq ($(BACKEND),hip) +CUDACPP_COMMONLIB=mg5amc_common_hip CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_hip else +CUDACPP_COMMONLIB=mg5amc_common_cpp CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_cpp endif diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/src/cudacpp_src.mk b/epochX/cudacpp/smeft_gg_tttt.mad/src/cudacpp_src.mk index 8783dc095a..f1958edb7c 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/src/cudacpp_src.mk +++ b/epochX/cudacpp/smeft_gg_tttt.mad/src/cudacpp_src.mk @@ -98,7 +98,11 @@ endif # NB1: there are no CUDA targets in src as we avoid RDC! # NB2: CUDA includes for curand.h are no longer needed in the C++ code anywhere in src! -MG5AMC_COMMONLIB = mg5amc_common +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) @@ -126,7 +130,7 @@ $(LIBDIR)/.build.$(TAG): #------------------------------------------------------------------------------- # Generic target and build rules: objects from C++ compilation -$(BUILDDIR)/%.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -139,22 +143,22 @@ endif #------------------------------------------------------------------------------- -cxx_objects=$(addprefix $(BUILDDIR)/, read_slha.o) -ifneq ($(GPUCC),) - gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_SMEFTsim_topU3l_MwScheme_UFO_$(GPUSUFFIX).o) +cxx_objects=$(addprefix $(BUILDDIR)/, read_slha_cpp.o) +ifeq ($(GPUCC),) + cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_SMEFTsim_topU3l_MwScheme_UFO_cpp.o) else - cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_SMEFTsim_topU3l_MwScheme_UFO.o) + gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_SMEFTsim_topU3l_MwScheme_UFO_$(GPUSUFFIX).o) endif # Target (and build rules): common (src) library -ifneq ($(GPUCC),) -$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) - @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) -else +ifeq ($(GPUCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) +else +$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) + @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi + $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt index 7115deec1c..298e9be9d9 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -62,6 +62,41 @@ set auto_convert_model T save options auto_convert_model save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt import model SMEFTsim_topU3l_MwScheme_UFO -massless_4t +INFO: download model from http://feynrules.irmp.ucl.ac.be/raw-attachment/wiki/SMEFT/SMEFTsim_topU3l_MwScheme_UFO.tar.gz to the following directory: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/models  +--2024-05-15 12:51:31-- http://feynrules.irmp.ucl.ac.be/raw-attachment/wiki/SMEFT/SMEFTsim_topU3l_MwScheme_UFO.tar.gz +Resolving feynrules.irmp.ucl.ac.be (feynrules.irmp.ucl.ac.be)... 130.104.48.109 +Connecting to feynrules.irmp.ucl.ac.be (feynrules.irmp.ucl.ac.be)|130.104.48.109|:80... connected. +HTTP request sent, awaiting response... 200 Ok +Length: 80562 (79K) [application/x-tar] +Saving to: ‘tmp.tgz’ + + 0K .......... .......... .......... .......... .......... 63% 914K 0s + 50K .......... .......... ........ 100% 325M=0.05s + +2024-05-15 12:51:31 (1.40 MB/s) - ‘tmp.tgz’ saved [80562/80562] + +SMEFTsim_topU3l_MwScheme_UFO/ +SMEFTsim_topU3l_MwScheme_UFO/__init__.py +SMEFTsim_topU3l_MwScheme_UFO/param_card_massless.dat +SMEFTsim_topU3l_MwScheme_UFO/CT_couplings.py +SMEFTsim_topU3l_MwScheme_UFO/particles.py +SMEFTsim_topU3l_MwScheme_UFO/write_param_card.py +SMEFTsim_topU3l_MwScheme_UFO/decays.py +SMEFTsim_topU3l_MwScheme_UFO/parameters.py +SMEFTsim_topU3l_MwScheme_UFO/restrict_massless.dat +SMEFTsim_topU3l_MwScheme_UFO/object_library.py +SMEFTsim_topU3l_MwScheme_UFO/coupling_orders.py +SMEFTsim_topU3l_MwScheme_UFO/version.info +SMEFTsim_topU3l_MwScheme_UFO/function_library.py +SMEFTsim_topU3l_MwScheme_UFO/couplings.py +SMEFTsim_topU3l_MwScheme_UFO/propagators.py +SMEFTsim_topU3l_MwScheme_UFO/lorentz.py +SMEFTsim_topU3l_MwScheme_UFO/vertices.py +SMEFTsim_topU3l_MwScheme_UFO/restrict_SMlimit_massless.dat +fail to load model but auto_convert_model is on True. Trying to convert the model +convert model /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/models/SMEFTsim_topU3l_MwScheme_UFO +retry the load of the model +import model SMEFTsim_topU3l_MwScheme_UFO -massless_4t INFO: load particles INFO: load vertices CRITICAL: Model with non QCD emission of gluon (found 14 of those). @@ -77,7 +112,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.1475965976715088  +DEBUG: model prefixing takes 0.11452722549438477  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -86,13 +121,16 @@ Defined multiparticle l- = e- mu- Defined multiparticle vl = ve vm vt Defined multiparticle vl~ = ve~ vm~ vt~ Defined multiparticle all = g a ve vm vt ve~ vm~ vt~ u c t d s b t1 u~ c~ t~ d~ s~ b~ t1~ z w+ z1 w1+ h h1 w- w1- e- mu- ta- e+ mu+ ta+ +INFO: Change particles name to pass to MG5 convention +Kept definitions of multiparticles p / j / l+ / l- / vl / vl~ unchanged +Defined multiparticle all = g a ve vm vt ve~ vm~ vt~ u c t d s b t1 u~ c~ t~ d~ s~ b~ t1~ z w+ z1 w1+ h h1 w- w1- e- mu- ta- e+ mu+ ta+ generate g g > t t~ t t~ INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 3.945 s +1 processes with 72 diagrams generated in 3.569 s Total: 1 processes with 72 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt Load PLUGIN.CUDACPP_OUTPUT @@ -115,7 +153,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/. -Generated helas calls for 1 subprocesses (72 diagrams) in 0.198 s +Generated helas calls for 1 subprocesses (72 diagrams) in 0.183 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines @@ -123,7 +161,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.334 s +ALOHA: aloha creates 5 routines in 0.301 s VVV5 VVV5 FFV1 @@ -143,7 +181,7 @@ INFO: Created files Parameters_SMEFTsim_topU3l_MwScheme_UFO.h and Parameters_SME INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. quit -real 0m5.419s -user 0m5.327s -sys 0m0.067s +real 0m5.839s +user 0m4.944s +sys 0m0.101s Code generation completed in 6 seconds diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk index f047530438..f8d86fd493 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk @@ -54,65 +54,6 @@ endif #------------------------------------------------------------------------------- -#=== Configure common compiler flags for C++ and CUDA/HIP - -INCFLAGS = -I. -OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here - -# Dependency on src directory -MG5AMC_COMMONLIB = mg5amc_common -LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -INCFLAGS += -I../../src - -# Compiler-specific googletest build directory (#125 and #738) -ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) - override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) -else ifneq ($(shell $(CXX) --version | egrep '^clang'),) - override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) - override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else - override CXXNAME = unknown -endif -###$(info CXXNAME=$(CXXNAME)) -override CXXNAMESUFFIX = _$(CXXNAME) - -# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) -export CXXNAMESUFFIX - -# Dependency on test directory -# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) -# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) -###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation -###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation -TESTDIRCOMMON = ../../../../../test -TESTDIRLOCAL = ../../test -ifneq ($(wildcard $(GTEST_ROOT)),) - TESTDIR = -else ifneq ($(LOCALGTEST),) - TESTDIR=$(TESTDIRLOCAL) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) - TESTDIR = $(TESTDIRCOMMON) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else - TESTDIR = -endif -ifneq ($(GTEST_ROOT),) - GTESTLIBDIR = $(GTEST_ROOT)/lib64/ - GTESTLIBS = $(GTESTLIBDIR)/libgtest.a - GTESTINC = -I$(GTEST_ROOT)/include -else - GTESTLIBDIR = - GTESTLIBS = - GTESTINC = -endif -###$(info GTEST_ROOT = $(GTEST_ROOT)) -###$(info LOCALGTEST = $(LOCALGTEST)) -###$(info TESTDIR = $(TESTDIR)) - -#------------------------------------------------------------------------------- - #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available: eventually use native instead?) @@ -340,6 +281,69 @@ endif #------------------------------------------------------------------------------- +#=== Configure common compiler flags for C++ and CUDA/HIP + +INCFLAGS = -I. +OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here + +# Dependency on src directory +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif +LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +INCFLAGS += -I../../src + +# Compiler-specific googletest build directory (#125 and #738) +ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) + override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) +else ifneq ($(shell $(CXX) --version | egrep '^clang'),) + override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) + override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else + override CXXNAME = unknown +endif +###$(info CXXNAME=$(CXXNAME)) +override CXXNAMESUFFIX = _$(CXXNAME) + +# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) +export CXXNAMESUFFIX + +# Dependency on test directory +# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) +# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) +###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation +###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation +TESTDIRCOMMON = ../../../../../test +TESTDIRLOCAL = ../../test +ifneq ($(wildcard $(GTEST_ROOT)),) + TESTDIR = +else ifneq ($(LOCALGTEST),) + TESTDIR=$(TESTDIRLOCAL) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) + TESTDIR = $(TESTDIRCOMMON) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else + TESTDIR = +endif +ifneq ($(GTEST_ROOT),) + GTESTLIBDIR = $(GTEST_ROOT)/lib64/ + GTESTLIBS = $(GTESTLIBDIR)/libgtest.a + GTESTINC = -I$(GTEST_ROOT)/include +else + GTESTLIBDIR = + GTESTLIBS = + GTESTINC = +endif +###$(info GTEST_ROOT = $(GTEST_ROOT)) +###$(info LOCALGTEST = $(LOCALGTEST)) +###$(info TESTDIR = $(TESTDIR)) + +#------------------------------------------------------------------------------- + #=== Configure PowerPC-specific compiler flags for C++ and CUDA/HIP # PowerPC-specific CXX compiler flags (being reviewed) @@ -606,27 +610,25 @@ override RUNTIME = #=== Makefile TARGETS and build rules below #=============================================================================== -cxx_main=$(BUILDDIR)/check.exe -fcxx_main=$(BUILDDIR)/fcheck.exe -ifneq ($(GPUCC),) - gpu_main=$(BUILDDIR)/gcheck.exe - fgpu_main=$(BUILDDIR)/fgcheck.exe +ifeq ($(GPUCC),) + cxx_checkmain=$(BUILDDIR)/check_cpp.exe + cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe + cxx_testmain=$(BUILDDIR)/runTest_cpp.exe else - gpu_main= - fgpu_main= + gpu_checkmain=$(BUILDDIR)/check_$(GPUSUFFIX).exe + gpu_fcheckmain=$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe + gpu_testmain=$(BUILDDIR)/runTest_$(GPUSUFFIX).exe endif -testmain=$(BUILDDIR)/runTest.exe - # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) # First target (default goal) -ifneq ($(GPUCC),) -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_main) $(fgpu_main) $(if $(GTESTLIBS),$(testmain)) +ifeq ($(GPUCC),) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_checkmain) $(cxx_fcheckmain) $(if $(GTESTLIBS),$(cxx_testmain)) else -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_main) $(fcxx_main) $(if $(GTESTLIBS),$(testmain)) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_checkmain) $(gpu_fcheckmain) $(if $(GTESTLIBS),$(gpu_testmain)) endif # Target (and build options): debug @@ -643,30 +645,30 @@ $(BUILDDIR)/.build.$(TAG): @if [ "$(oldtagsb)" != "" ]; then echo "Cannot build for tag=$(TAG) as old builds exist for other tags:"; echo " $(oldtagsb)"; echo "Please run 'make clean' first\nIf 'make clean' is not enough: run 'make clean USEBUILDDIR=1 AVX=$(AVX) FPTYPE=$(FPTYPE)' or 'make cleanall'"; exit 1; fi @touch $(BUILDDIR)/.build.$(TAG) -# Apply special build flags only to CrossSectionKernel[_$(GPUSUFFIX)].o (no fast math, see #117 and #516) +# Apply special build flags only to CrossSectionKernel_.o (no fast math, see #117 and #516) # Added edgecase for HIP compilation ifeq ($(shell $(CXX) --version | grep ^nvc++),) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -fno-fast-math -$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math endif -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o (NVTX in timermap.h, #679) -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) +# Apply special build flags only to check_sa_.o (NVTX in timermap.h, #679) +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o and (Cu|Hip)randRandomNumberKernel[_$(GPUSUFFIX)].o -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(RNDCXXFLAGS) +# Apply special build flags only to check_sa_.o and (Cu|Hip)randRandomNumberKernel_.o +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) ifeq ($(HASCURAND),hasCurand) # curand headers, #679 -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUDA_INC) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(CUDA_INC) endif ifeq ($(HASHIPRAND),hasHiprand) # hiprand headers -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(HIP_INC) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(HIP_INC) endif # Avoid "warning: builtin __has_trivial_... is deprecated; use __is_trivially_... instead" in GPUCC with icx2023 (#592) @@ -679,21 +681,21 @@ endif # Avoid clang warning "overriding '-ffp-contract=fast' option with '-ffp-contract=on'" (#516) # This patch does remove the warning, but I prefer to keep it disabled for the moment... ###ifneq ($(shell $(CXX) --version | egrep '^(clang|Apple clang|Intel)'),) -###$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -Wno-overriding-t-option ###ifneq ($(GPUCC),) -###$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option ###endif ###endif #### Apply special build flags only to CPPProcess.o (-flto) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += -flto +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += -flto #### Apply special build flags only to CPPProcess.o (AVXFLAGS) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += $(AVXFLAGS) +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += $(AVXFLAGS) # Generic target and build rules: objects from C++ compilation # (NB do not include CUDA_INC here! add it only for NVTX or curand #679) -$(BUILDDIR)/%.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -718,8 +720,8 @@ processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') ###$(info processid_short=$(processid_short)) MG5AMC_CXXLIB = mg5amc_$(processid_short)_cpp -cxx_objects_lib=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o -cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel.o $(BUILDDIR)/RamboSamplingKernels.o +cxx_objects_lib=$(BUILDDIR)/CPPProcess_cpp.o $(BUILDDIR)/MatrixElementKernels_cpp.o $(BUILDDIR)/BridgeKernels_cpp.o $(BUILDDIR)/CrossSectionKernels_cpp.o +cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_cpp.o $(BUILDDIR)/RamboSamplingKernels_cpp.o ifneq ($(GPUCC),) MG5AMC_GPULIB = mg5amc_$(processid_short)_$(GPUSUFFIX) @@ -728,8 +730,8 @@ gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/ endif # Target (and build rules): C++ and CUDA/HIP shared libraries -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) @@ -756,63 +758,63 @@ endif #------------------------------------------------------------------------------- # Target (and build rules): C++ and CUDA/HIP standalone executables -$(cxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o - $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(RNDLIBFLAGS) +$(cxx_checkmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_checkmain): $(BUILDDIR)/check_sa_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o + $(CXX) -o $@ $(BUILDDIR)/check_sa_cpp.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o $(RNDLIBFLAGS) ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(gpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(gpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_checkmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_checkmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(gpu_main): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_checkmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(gpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(gpu_main): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o +$(gpu_checkmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_checkmain): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(GPUCC) -o $@ $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(RNDLIBFLAGS) endif #------------------------------------------------------------------------------- # Generic target and build rules: objects from Fortran compilation -$(BUILDDIR)/%.o : %.f *.inc +$(BUILDDIR)/%_fortran.o : %.f *.inc @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(FC) -I. -c $< -o $@ # Generic target and build rules: objects from Fortran compilation -###$(BUILDDIR)/%.o : %.f *.inc +###$(BUILDDIR)/%_fortran.o : %.f *.inc ### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi ### @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi ### $(FC) -I. -I$(INCDIR) -c $< -o $@ # Target (and build rules): Fortran standalone executables -###$(BUILDDIR)/fcheck_sa.o : $(INCDIR)/fbridge.inc +###$(BUILDDIR)/fcheck_sa_fortran.o : $(INCDIR)/fbridge.inc ifeq ($(UNAME_S),Darwin) -$(fcxx_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(cxx_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fcxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fcxx_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) +$(cxx_fcheckmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ else - $(CXX) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) + $(CXX) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) endif ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(fgpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(fgpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_fcheckmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_fcheckmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') endif ifeq ($(UNAME_S),Darwin) -$(fgpu_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(gpu_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fgpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fgpu_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) +$(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else - $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) + $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif endif @@ -820,83 +822,94 @@ endif # Target (and build rules): test objects and test executable ifeq ($(GPUCC),) -$(BUILDDIR)/testxxx.o: $(GTESTLIBS) -$(BUILDDIR)/testxxx.o: INCFLAGS += $(GTESTINC) -$(BUILDDIR)/testxxx.o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testxxx.o # Comment out this line to skip the C++ test of xxx functions +$(BUILDDIR)/testxxx_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testxxx_cpp.o: INCFLAGS += $(GTESTINC) +$(BUILDDIR)/testxxx_cpp.o: testxxx_cc_ref.txt +$(cxx_testmain): $(BUILDDIR)/testxxx_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testxxx_cpp.o # Comment out this line to skip the C++ test of xxx functions else $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions +$(gpu_testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions endif ifneq ($(UNAME_S),Darwin) # Disable testmisc on Darwin (workaround for issue #838) ifeq ($(GPUCC),) -$(BUILDDIR)/testmisc.o: $(GTESTLIBS) -$(BUILDDIR)/testmisc.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testmisc.o # Comment out this line to skip the C++ miscellaneous tests +$(BUILDDIR)/testmisc_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testmisc_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/testmisc_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testmisc_cpp.o # Comment out this line to skip the C++ miscellaneous tests else $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests +$(gpu_testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests endif endif ifeq ($(GPUCC),) -$(BUILDDIR)/runTest.o: $(GTESTLIBS) -$(BUILDDIR)/runTest.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/runTest.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/runTest.o +$(BUILDDIR)/runTest_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/runTest_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/runTest_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/runTest_cpp.o else $(BUILDDIR)/runTest_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/runTest_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o endif -$(testmain): $(GTESTLIBS) -$(testmain): INCFLAGS += $(GTESTINC) -$(testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest -###$(testmain): LIBFLAGS += -lgtest_main # no longer necessary since we added main() to testxxx.cc +ifeq ($(GPUCC),) +$(cxx_testmain): $(GTESTLIBS) +$(cxx_testmain): INCFLAGS += $(GTESTINC) +$(cxx_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +else +$(gpu_testmain): $(GTESTLIBS) +$(gpu_testmain): INCFLAGS += $(GTESTINC) +$(gpu_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +endif +ifeq ($(GPUCC),) # if at all, OMP is used only in CXX builds (not in GPU builds) ifneq ($(OMPFLAGS),) ifneq ($(shell $(CXX) --version | egrep '^Intel'),) -$(testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) +$(cxx_testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) else ifneq ($(shell $(CXX) --version | egrep '^clang'),) -$(testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 +$(cxx_testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 ###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -###$(testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) +###$(cxx_testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) else -$(testmain): LIBFLAGS += -lgomp +$(cxx_testmain): LIBFLAGS += -lgomp +endif endif endif # Test quadmath in testmisc.cc tests for constexpr_math #627 -###$(testmain): LIBFLAGS += -lquadmath +###ifeq ($(GPUCC),) +###$(cxx_testmain): LIBFLAGS += -lquadmath +###else +###$(gpu_testmain): LIBFLAGS += -lquadmath +###endif # Bypass std::filesystem completely to ease portability on LUMI #803 -#ifneq ($(findstring hipcc,$(GPUCC)),) -#$(testmain): LIBFLAGS += -lstdc++fs -#endif +###ifneq ($(findstring hipcc,$(GPUCC)),) +###$(gpu_testmain): LIBFLAGS += -lstdc++fs +###endif -ifeq ($(GPUCC),) # link only runTest.o -$(testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) +ifeq ($(GPUCC),) # link only runTest_cpp.o +$(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) $(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS) -else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest.o and runTest_$(GPUSUFFIX).o) -$(testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) +else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o) +$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else @@ -1051,51 +1064,50 @@ endif #------------------------------------------------------------------------------- -# Target: check (run the C++ test executable) -# [NB THIS IS WHAT IS USED IN THE GITHUB CI!] -# [FIXME: SHOULD CHANGE THE TARGET NAME "check" THAT HAS NOTHING TO DO WITH "check.exe"] -ifneq ($(GPUCC),) -check: runTest cmpFGcheck -else -check: runTest cmpFcheck -endif +# Target: 'make test' (execute runTest.exe, and compare check.exe with fcheck.exe) +# [NB: THIS IS WHAT IS TESTED IN THE GITHUB CI!] +# [NB: This used to be called 'make check' but the name has been changed as this has nothing to do with 'check.exe'] +test: runTest cmpFcheck -# Target: runTest (run the C++ test executable runTest.exe) +# Target: runTest (run the C++ or CUDA/HIP test executable runTest.exe) runTest: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/runTest.exe +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/runTest_cpp.exe +else + $(RUNTIME) $(BUILDDIR)/runTest_$(GPUSUFFIX).exe +endif -# Target: runCheck (run the C++ standalone executable check.exe, with a small number of events) +# Target: runCheck (run the C++ or CUDA/HIP standalone executable check.exe, with a small number of events) runCheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/check.exe -p 2 32 2 - -# Target: runGcheck (run the CUDA/HIP standalone executable gcheck.exe, with a small number of events) -runGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/gcheck.exe -p 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/check_cpp.exe -p 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +endif -# Target: runFcheck (run the Fortran standalone executable - with C++ MEs - fcheck.exe, with a small number of events) +# Target: runFcheck (run the Fortran standalone executable - with C++ or CUDA/HIP MEs - fcheck.exe, with a small number of events) runFcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 - -# Target: runFGcheck (run the Fortran standalone executable - with CUDA/HIP MEs - fgcheck.exe, with a small number of events) -runFGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 +endif -# Target: cmpFcheck (compare ME results from the C++ and Fortran with C++ MEs standalone executables, with a small number of events) +# Target: cmpFcheck (compare ME results from the C++/CUDA/HIP and Fortran with C++/CUDA/HIP MEs standalone executables, with a small number of events) cmpFcheck: all.$(TAG) @echo - @echo "$(BUILDDIR)/check.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/check.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi - -# Target: cmpFGcheck (compare ME results from the CUDA/HIP and Fortran with CUDA/HIP MEs standalone executables, with a small number of events) -cmpFGcheck: all.$(TAG) - @echo - @echo "$(BUILDDIR)/gcheck.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fgcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/gcheck.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +ifeq ($(GPUCC),) + @echo "$(BUILDDIR)/check_cpp.exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_cpp.exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_cpp.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +else + @echo "$(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +endif # Target: cuda-memcheck (run the CUDA standalone executable gcheck.exe with a small number of events through cuda-memcheck) cuda-memcheck: all.$(TAG) - $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/gcheck.exe -p 2 32 2 + $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/src/cudacpp_src.mk b/epochX/cudacpp/smeft_gg_tttt.sa/src/cudacpp_src.mk index 8783dc095a..f1958edb7c 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/src/cudacpp_src.mk +++ b/epochX/cudacpp/smeft_gg_tttt.sa/src/cudacpp_src.mk @@ -98,7 +98,11 @@ endif # NB1: there are no CUDA targets in src as we avoid RDC! # NB2: CUDA includes for curand.h are no longer needed in the C++ code anywhere in src! -MG5AMC_COMMONLIB = mg5amc_common +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) @@ -126,7 +130,7 @@ $(LIBDIR)/.build.$(TAG): #------------------------------------------------------------------------------- # Generic target and build rules: objects from C++ compilation -$(BUILDDIR)/%.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -139,22 +143,22 @@ endif #------------------------------------------------------------------------------- -cxx_objects=$(addprefix $(BUILDDIR)/, read_slha.o) -ifneq ($(GPUCC),) - gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_SMEFTsim_topU3l_MwScheme_UFO_$(GPUSUFFIX).o) +cxx_objects=$(addprefix $(BUILDDIR)/, read_slha_cpp.o) +ifeq ($(GPUCC),) + cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_SMEFTsim_topU3l_MwScheme_UFO_cpp.o) else - cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_SMEFTsim_topU3l_MwScheme_UFO.o) + gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_SMEFTsim_topU3l_MwScheme_UFO_$(GPUSUFFIX).o) endif # Target (and build rules): common (src) library -ifneq ($(GPUCC),) -$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) - @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) -else +ifeq ($(GPUCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) +else +$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) + @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi + $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt index 6060e5336f..7675af32e5 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.131 s +1 processes with 6 diagrams generated in 0.119 s Total: 1 processes with 6 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_t1t1 --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -577,7 +577,7 @@ INFO: Generating Helas calls for process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t1 t1~ @1 INFO: Creating files in directory P1_gg_t1t1x DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -592,19 +592,19 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_t1t1x -Generated helas calls for 1 subprocesses (6 diagrams) in 0.009 s -Wrote files for 16 helas calls in 0.118 s +Generated helas calls for 1 subprocesses (6 diagrams) in 0.008 s +Wrote files for 16 helas calls in 0.108 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.197 s +ALOHA: aloha creates 3 routines in 0.172 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 6 routines in 0.193 s +ALOHA: aloha creates 6 routines in 0.167 s VVV1 VSS1 VSS1 @@ -645,8 +645,8 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.946s -user 0m2.668s +real 0m2.660s +user 0m2.390s sys 0m0.267s Code generation completed in 3 seconds ************************************************************ @@ -673,8 +673,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run quit INFO: @@ -703,8 +703,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards param quit INFO: diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Source/MODEL/input.inc b/epochX/cudacpp/susy_gg_t1t1.mad/Source/MODEL/input.inc index 286a92e533..59b3e6e7f2 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/Source/MODEL/input.inc +++ b/epochX/cudacpp/susy_gg_t1t1.mad/Source/MODEL/input.inc @@ -2,29 +2,30 @@ ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc c written by the UFO converter ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc - DOUBLE PRECISION MDL_SQRT__AS,MDL_MSD5,MDL_MSD2,MDL_MSU5 - $ ,MDL_MSU2,MDL_MSL5,MDL_MSL2,MDL_MSN2,MDL_RMU22X2,MDL_RMQ22X2 - $ ,MDL_RML22X2,MDL_RME22X2,MDL_RMD22X2,MDL_CONJG__RN3X3 - $ ,MDL_CONJG__CKM3X3,MDL_RU4X4,MDL_RU1X1,MDL_RN3X3,MDL_RN1X1 - $ ,MDL_RL4X4,MDL_RL1X1,MDL_RD4X4,MDL_RD1X1,MDL_I98X11,MDL_I97X11 - $ ,MDL_I96X11,MDL_I93X11,MDL_I92X11,MDL_I87X11,MDL_I82X11 - $ ,MDL_I74X11,MDL_I6X44,MDL_I5X11,MDL_I53X11,MDL_I52X44 - $ ,MDL_I51X11,MDL_I39X11,MDL_I31X11,MDL_I26X44,MDL_I25X11 - $ ,MDL_I12X11,MDL_I102X44,MDL_I101X44,MDL_I100X44,MDL_CKM3X3 - $ ,MDL_ATAN__TB,MDL_BETA,MDL_CW,MDL_MZ__EXP__2,MDL_CW__EXP__2 - $ ,MDL_SW,MDL_COS__BETA,MDL_SIN__BETA,MDL_SQRT__2,MDL_SW__EXP__2 - $ ,MDL_COS__ALP,MDL_SIN__ALP,MDL_EE,MDL_GP,MDL_GW,MDL_VEV,MDL_VD - $ ,MDL_VU,MDL_EE__EXP__2,MDL_RRD1X1,MDL_RRD2X2,MDL_RRD3X3 - $ ,MDL_RRD3X6,MDL_RRD4X4,MDL_RRD5X5,MDL_RRD6X3,MDL_RRD6X6,MDL_ALP - $ ,MDL_RMUH,MDL_TB,MDL_MA2,MDL_RMD21X1,MDL_RMD23X3,MDL_RME21X1 - $ ,MDL_RME23X3,MDL_RML21X1,MDL_RML23X3,MDL_RMX1,MDL_RMX2,MDL_RMX3 - $ ,MDL_MHD2,MDL_MHU2,MDL_RMQ21X1,MDL_RMQ23X3,MDL_RMU21X1 - $ ,MDL_RMU23X3,MDL_RNN1X1,MDL_RNN1X2,MDL_RNN1X3,MDL_RNN1X4 - $ ,MDL_RNN2X1,MDL_RNN2X2,MDL_RNN2X3,MDL_RNN2X4,MDL_RNN3X1 - $ ,MDL_RNN3X2,MDL_RNN3X3,MDL_RNN3X4,MDL_RNN4X1,MDL_RNN4X2 - $ ,MDL_RNN4X3,MDL_RNN4X4,MDL_RRL1X1,MDL_RRL2X2,MDL_RRL3X3 - $ ,MDL_RRL3X6,MDL_RRL4X4,MDL_RRL5X5,MDL_RRL6X3,MDL_RRL6X6,AEWM1 - $ ,AS,MDL_RRN1X1,MDL_RRN2X2,MDL_RRN3X3,MDL_RTD3X3,MDL_RTE3X3 + DOUBLE PRECISION MDL_SQRT__AS,MDL_G__EXP__2,MDL_MSD5,MDL_MSD2 + $ ,MDL_MSU5,MDL_MSU2,MDL_MSL5,MDL_MSL2,MDL_MSN2,MDL_RMU22X2 + $ ,MDL_RMQ22X2,MDL_RML22X2,MDL_RME22X2,MDL_RMD22X2 + $ ,MDL_CONJG__RN3X3,MDL_CONJG__CKM3X3,MDL_RU4X4,MDL_RU1X1 + $ ,MDL_RN3X3,MDL_RN1X1,MDL_RL4X4,MDL_RL1X1,MDL_RD4X4,MDL_RD1X1 + $ ,MDL_I98X11,MDL_I97X11,MDL_I96X11,MDL_I93X11,MDL_I92X11 + $ ,MDL_I87X11,MDL_I82X11,MDL_I74X11,MDL_I6X44,MDL_I5X11 + $ ,MDL_I53X11,MDL_I52X44,MDL_I51X11,MDL_I39X11,MDL_I31X11 + $ ,MDL_I26X44,MDL_I25X11,MDL_I12X11,MDL_I102X44,MDL_I101X44 + $ ,MDL_I100X44,MDL_CKM3X3,MDL_ATAN__TB,MDL_BETA,MDL_CW + $ ,MDL_MZ__EXP__2,MDL_CW__EXP__2,MDL_SW,MDL_COS__BETA + $ ,MDL_SIN__BETA,MDL_SQRT__2,MDL_SW__EXP__2,MDL_COS__ALP + $ ,MDL_SIN__ALP,MDL_EE,MDL_GP,MDL_GW,MDL_VEV,MDL_VD,MDL_VU + $ ,MDL_EE__EXP__2,MDL_RRD1X1,MDL_RRD2X2,MDL_RRD3X3,MDL_RRD3X6 + $ ,MDL_RRD4X4,MDL_RRD5X5,MDL_RRD6X3,MDL_RRD6X6,MDL_ALP,MDL_RMUH + $ ,MDL_TB,MDL_MA2,MDL_RMD21X1,MDL_RMD23X3,MDL_RME21X1,MDL_RME23X3 + $ ,MDL_RML21X1,MDL_RML23X3,MDL_RMX1,MDL_RMX2,MDL_RMX3,MDL_MHD2 + $ ,MDL_MHU2,MDL_RMQ21X1,MDL_RMQ23X3,MDL_RMU21X1,MDL_RMU23X3 + $ ,MDL_RNN1X1,MDL_RNN1X2,MDL_RNN1X3,MDL_RNN1X4,MDL_RNN2X1 + $ ,MDL_RNN2X2,MDL_RNN2X3,MDL_RNN2X4,MDL_RNN3X1,MDL_RNN3X2 + $ ,MDL_RNN3X3,MDL_RNN3X4,MDL_RNN4X1,MDL_RNN4X2,MDL_RNN4X3 + $ ,MDL_RNN4X4,MDL_RRL1X1,MDL_RRL2X2,MDL_RRL3X3,MDL_RRL3X6 + $ ,MDL_RRL4X4,MDL_RRL5X5,MDL_RRL6X3,MDL_RRL6X6,AEWM1,AS + $ ,MDL_RRN1X1,MDL_RRN2X2,MDL_RRN3X3,MDL_RTD3X3,MDL_RTE3X3 $ ,MDL_RTU3X3,MDL_RUU1X1,MDL_RUU1X2,MDL_RUU2X1,MDL_RUU2X2 $ ,MDL_RMNS1X1,MDL_RMNS2X2,MDL_RMNS3X3,MDL_RRU1X1,MDL_RRU2X2 $ ,MDL_RRU3X3,MDL_RRU3X6,MDL_RRU4X4,MDL_RRU5X5,MDL_RRU6X3 @@ -32,29 +33,30 @@ ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc $ ,MDL_RVV1X2,MDL_RVV2X1,MDL_RVV2X2,MDL_RYD3X3,MDL_RYE3X3 $ ,MDL_RYU3X3 - COMMON/PARAMS_R/ MDL_SQRT__AS,MDL_MSD5,MDL_MSD2,MDL_MSU5 - $ ,MDL_MSU2,MDL_MSL5,MDL_MSL2,MDL_MSN2,MDL_RMU22X2,MDL_RMQ22X2 - $ ,MDL_RML22X2,MDL_RME22X2,MDL_RMD22X2,MDL_CONJG__RN3X3 - $ ,MDL_CONJG__CKM3X3,MDL_RU4X4,MDL_RU1X1,MDL_RN3X3,MDL_RN1X1 - $ ,MDL_RL4X4,MDL_RL1X1,MDL_RD4X4,MDL_RD1X1,MDL_I98X11,MDL_I97X11 - $ ,MDL_I96X11,MDL_I93X11,MDL_I92X11,MDL_I87X11,MDL_I82X11 - $ ,MDL_I74X11,MDL_I6X44,MDL_I5X11,MDL_I53X11,MDL_I52X44 - $ ,MDL_I51X11,MDL_I39X11,MDL_I31X11,MDL_I26X44,MDL_I25X11 - $ ,MDL_I12X11,MDL_I102X44,MDL_I101X44,MDL_I100X44,MDL_CKM3X3 - $ ,MDL_ATAN__TB,MDL_BETA,MDL_CW,MDL_MZ__EXP__2,MDL_CW__EXP__2 - $ ,MDL_SW,MDL_COS__BETA,MDL_SIN__BETA,MDL_SQRT__2,MDL_SW__EXP__2 - $ ,MDL_COS__ALP,MDL_SIN__ALP,MDL_EE,MDL_GP,MDL_GW,MDL_VEV,MDL_VD - $ ,MDL_VU,MDL_EE__EXP__2,MDL_RRD1X1,MDL_RRD2X2,MDL_RRD3X3 - $ ,MDL_RRD3X6,MDL_RRD4X4,MDL_RRD5X5,MDL_RRD6X3,MDL_RRD6X6,MDL_ALP - $ ,MDL_RMUH,MDL_TB,MDL_MA2,MDL_RMD21X1,MDL_RMD23X3,MDL_RME21X1 - $ ,MDL_RME23X3,MDL_RML21X1,MDL_RML23X3,MDL_RMX1,MDL_RMX2,MDL_RMX3 - $ ,MDL_MHD2,MDL_MHU2,MDL_RMQ21X1,MDL_RMQ23X3,MDL_RMU21X1 - $ ,MDL_RMU23X3,MDL_RNN1X1,MDL_RNN1X2,MDL_RNN1X3,MDL_RNN1X4 - $ ,MDL_RNN2X1,MDL_RNN2X2,MDL_RNN2X3,MDL_RNN2X4,MDL_RNN3X1 - $ ,MDL_RNN3X2,MDL_RNN3X3,MDL_RNN3X4,MDL_RNN4X1,MDL_RNN4X2 - $ ,MDL_RNN4X3,MDL_RNN4X4,MDL_RRL1X1,MDL_RRL2X2,MDL_RRL3X3 - $ ,MDL_RRL3X6,MDL_RRL4X4,MDL_RRL5X5,MDL_RRL6X3,MDL_RRL6X6,AEWM1 - $ ,AS,MDL_RRN1X1,MDL_RRN2X2,MDL_RRN3X3,MDL_RTD3X3,MDL_RTE3X3 + COMMON/PARAMS_R/ MDL_SQRT__AS,MDL_G__EXP__2,MDL_MSD5,MDL_MSD2 + $ ,MDL_MSU5,MDL_MSU2,MDL_MSL5,MDL_MSL2,MDL_MSN2,MDL_RMU22X2 + $ ,MDL_RMQ22X2,MDL_RML22X2,MDL_RME22X2,MDL_RMD22X2 + $ ,MDL_CONJG__RN3X3,MDL_CONJG__CKM3X3,MDL_RU4X4,MDL_RU1X1 + $ ,MDL_RN3X3,MDL_RN1X1,MDL_RL4X4,MDL_RL1X1,MDL_RD4X4,MDL_RD1X1 + $ ,MDL_I98X11,MDL_I97X11,MDL_I96X11,MDL_I93X11,MDL_I92X11 + $ ,MDL_I87X11,MDL_I82X11,MDL_I74X11,MDL_I6X44,MDL_I5X11 + $ ,MDL_I53X11,MDL_I52X44,MDL_I51X11,MDL_I39X11,MDL_I31X11 + $ ,MDL_I26X44,MDL_I25X11,MDL_I12X11,MDL_I102X44,MDL_I101X44 + $ ,MDL_I100X44,MDL_CKM3X3,MDL_ATAN__TB,MDL_BETA,MDL_CW + $ ,MDL_MZ__EXP__2,MDL_CW__EXP__2,MDL_SW,MDL_COS__BETA + $ ,MDL_SIN__BETA,MDL_SQRT__2,MDL_SW__EXP__2,MDL_COS__ALP + $ ,MDL_SIN__ALP,MDL_EE,MDL_GP,MDL_GW,MDL_VEV,MDL_VD,MDL_VU + $ ,MDL_EE__EXP__2,MDL_RRD1X1,MDL_RRD2X2,MDL_RRD3X3,MDL_RRD3X6 + $ ,MDL_RRD4X4,MDL_RRD5X5,MDL_RRD6X3,MDL_RRD6X6,MDL_ALP,MDL_RMUH + $ ,MDL_TB,MDL_MA2,MDL_RMD21X1,MDL_RMD23X3,MDL_RME21X1,MDL_RME23X3 + $ ,MDL_RML21X1,MDL_RML23X3,MDL_RMX1,MDL_RMX2,MDL_RMX3,MDL_MHD2 + $ ,MDL_MHU2,MDL_RMQ21X1,MDL_RMQ23X3,MDL_RMU21X1,MDL_RMU23X3 + $ ,MDL_RNN1X1,MDL_RNN1X2,MDL_RNN1X3,MDL_RNN1X4,MDL_RNN2X1 + $ ,MDL_RNN2X2,MDL_RNN2X3,MDL_RNN2X4,MDL_RNN3X1,MDL_RNN3X2 + $ ,MDL_RNN3X3,MDL_RNN3X4,MDL_RNN4X1,MDL_RNN4X2,MDL_RNN4X3 + $ ,MDL_RNN4X4,MDL_RRL1X1,MDL_RRL2X2,MDL_RRL3X3,MDL_RRL3X6 + $ ,MDL_RRL4X4,MDL_RRL5X5,MDL_RRL6X3,MDL_RRL6X6,AEWM1,AS + $ ,MDL_RRN1X1,MDL_RRN2X2,MDL_RRN3X3,MDL_RTD3X3,MDL_RTE3X3 $ ,MDL_RTU3X3,MDL_RUU1X1,MDL_RUU1X2,MDL_RUU2X1,MDL_RUU2X2 $ ,MDL_RMNS1X1,MDL_RMNS2X2,MDL_RMNS3X3,MDL_RRU1X1,MDL_RRU2X2 $ ,MDL_RRU3X3,MDL_RRU3X6,MDL_RRU4X4,MDL_RRU5X5,MDL_RRU6X3 @@ -63,18 +65,18 @@ ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc $ ,MDL_RYU3X3 - DOUBLE COMPLEX MDL_G__EXP__2,MDL_MD21X1,MDL_MD22X2,MDL_MD23X3 - $ ,MDL_ME21X1,MDL_ME22X2,MDL_ME23X3,MDL_ML21X1,MDL_ML22X2 - $ ,MDL_ML23X3,MDL_MQ21X1,MDL_MQ22X2,MDL_MQ23X3,MDL_MU21X1 - $ ,MDL_MU22X2,MDL_MU23X3,MDL_MUH,MDL_MX1,MDL_MX2,MDL_MX3 - $ ,MDL_NN1X1,MDL_NN1X2,MDL_NN1X3,MDL_NN1X4,MDL_NN2X1,MDL_NN2X2 - $ ,MDL_NN2X3,MDL_NN2X4,MDL_NN3X1,MDL_NN3X2,MDL_NN3X3,MDL_NN3X4 - $ ,MDL_NN4X1,MDL_NN4X2,MDL_NN4X3,MDL_NN4X4,MDL_RD3X3,MDL_RD3X6 - $ ,MDL_RD6X3,MDL_RD6X6,MDL_RL3X3,MDL_RL3X6,MDL_RL6X3,MDL_RL6X6 - $ ,MDL_RU3X3,MDL_RU3X6,MDL_RU6X3,MDL_RU6X6,MDL_UU1X1,MDL_UU1X2 - $ ,MDL_UU2X1,MDL_UU2X2,MDL_VV1X1,MDL_VV1X2,MDL_VV2X1,MDL_VV2X2 - $ ,MDL_TD3X3,MDL_TE3X3,MDL_TU3X3,MDL_YD3X3,MDL_YE3X3,MDL_YU3X3 - $ ,MDL_BB,MDL_CONJG__YU3X3,MDL_I1X33,MDL_CONJG__YD3X3,MDL_I10X33 + DOUBLE COMPLEX MDL_MD21X1,MDL_MD22X2,MDL_MD23X3,MDL_ME21X1 + $ ,MDL_ME22X2,MDL_ME23X3,MDL_ML21X1,MDL_ML22X2,MDL_ML23X3 + $ ,MDL_MQ21X1,MDL_MQ22X2,MDL_MQ23X3,MDL_MU21X1,MDL_MU22X2 + $ ,MDL_MU23X3,MDL_MUH,MDL_MX1,MDL_MX2,MDL_MX3,MDL_NN1X1,MDL_NN1X2 + $ ,MDL_NN1X3,MDL_NN1X4,MDL_NN2X1,MDL_NN2X2,MDL_NN2X3,MDL_NN2X4 + $ ,MDL_NN3X1,MDL_NN3X2,MDL_NN3X3,MDL_NN3X4,MDL_NN4X1,MDL_NN4X2 + $ ,MDL_NN4X3,MDL_NN4X4,MDL_RD3X3,MDL_RD3X6,MDL_RD6X3,MDL_RD6X6 + $ ,MDL_RL3X3,MDL_RL3X6,MDL_RL6X3,MDL_RL6X6,MDL_RU3X3,MDL_RU3X6 + $ ,MDL_RU6X3,MDL_RU6X6,MDL_UU1X1,MDL_UU1X2,MDL_UU2X1,MDL_UU2X2 + $ ,MDL_VV1X1,MDL_VV1X2,MDL_VV2X1,MDL_VV2X2,MDL_TD3X3,MDL_TE3X3 + $ ,MDL_TU3X3,MDL_YD3X3,MDL_YE3X3,MDL_YU3X3,MDL_BB + $ ,MDL_CONJG__YU3X3,MDL_I1X33,MDL_CONJG__YD3X3,MDL_I10X33 $ ,MDL_I10X36,MDL_CONJG__RD3X6,MDL_I100X33,MDL_I100X36 $ ,MDL_CONJG__RD6X6,MDL_I100X63,MDL_I100X66,MDL_CONJG__RL3X6 $ ,MDL_I101X33,MDL_I101X36,MDL_CONJG__RL6X6,MDL_I101X63 @@ -148,18 +150,18 @@ ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc $ ,MDL_CONJG__UU2X2,MDL_CONJG__VV1X1,MDL_CONJG__VV1X2 $ ,MDL_CONJG__VV2X1,MDL_CONJG__VV2X2,MDL_CONJG__MUH - COMMON/PARAMS_C/ MDL_G__EXP__2,MDL_MD21X1,MDL_MD22X2,MDL_MD23X3 - $ ,MDL_ME21X1,MDL_ME22X2,MDL_ME23X3,MDL_ML21X1,MDL_ML22X2 - $ ,MDL_ML23X3,MDL_MQ21X1,MDL_MQ22X2,MDL_MQ23X3,MDL_MU21X1 - $ ,MDL_MU22X2,MDL_MU23X3,MDL_MUH,MDL_MX1,MDL_MX2,MDL_MX3 - $ ,MDL_NN1X1,MDL_NN1X2,MDL_NN1X3,MDL_NN1X4,MDL_NN2X1,MDL_NN2X2 - $ ,MDL_NN2X3,MDL_NN2X4,MDL_NN3X1,MDL_NN3X2,MDL_NN3X3,MDL_NN3X4 - $ ,MDL_NN4X1,MDL_NN4X2,MDL_NN4X3,MDL_NN4X4,MDL_RD3X3,MDL_RD3X6 - $ ,MDL_RD6X3,MDL_RD6X6,MDL_RL3X3,MDL_RL3X6,MDL_RL6X3,MDL_RL6X6 - $ ,MDL_RU3X3,MDL_RU3X6,MDL_RU6X3,MDL_RU6X6,MDL_UU1X1,MDL_UU1X2 - $ ,MDL_UU2X1,MDL_UU2X2,MDL_VV1X1,MDL_VV1X2,MDL_VV2X1,MDL_VV2X2 - $ ,MDL_TD3X3,MDL_TE3X3,MDL_TU3X3,MDL_YD3X3,MDL_YE3X3,MDL_YU3X3 - $ ,MDL_BB,MDL_CONJG__YU3X3,MDL_I1X33,MDL_CONJG__YD3X3,MDL_I10X33 + COMMON/PARAMS_C/ MDL_MD21X1,MDL_MD22X2,MDL_MD23X3,MDL_ME21X1 + $ ,MDL_ME22X2,MDL_ME23X3,MDL_ML21X1,MDL_ML22X2,MDL_ML23X3 + $ ,MDL_MQ21X1,MDL_MQ22X2,MDL_MQ23X3,MDL_MU21X1,MDL_MU22X2 + $ ,MDL_MU23X3,MDL_MUH,MDL_MX1,MDL_MX2,MDL_MX3,MDL_NN1X1,MDL_NN1X2 + $ ,MDL_NN1X3,MDL_NN1X4,MDL_NN2X1,MDL_NN2X2,MDL_NN2X3,MDL_NN2X4 + $ ,MDL_NN3X1,MDL_NN3X2,MDL_NN3X3,MDL_NN3X4,MDL_NN4X1,MDL_NN4X2 + $ ,MDL_NN4X3,MDL_NN4X4,MDL_RD3X3,MDL_RD3X6,MDL_RD6X3,MDL_RD6X6 + $ ,MDL_RL3X3,MDL_RL3X6,MDL_RL6X3,MDL_RL6X6,MDL_RU3X3,MDL_RU3X6 + $ ,MDL_RU6X3,MDL_RU6X6,MDL_UU1X1,MDL_UU1X2,MDL_UU2X1,MDL_UU2X2 + $ ,MDL_VV1X1,MDL_VV1X2,MDL_VV2X1,MDL_VV2X2,MDL_TD3X3,MDL_TE3X3 + $ ,MDL_TU3X3,MDL_YD3X3,MDL_YE3X3,MDL_YU3X3,MDL_BB + $ ,MDL_CONJG__YU3X3,MDL_I1X33,MDL_CONJG__YD3X3,MDL_I10X33 $ ,MDL_I10X36,MDL_CONJG__RD3X6,MDL_I100X33,MDL_I100X36 $ ,MDL_CONJG__RD6X6,MDL_I100X63,MDL_I100X66,MDL_CONJG__RL3X6 $ ,MDL_I101X33,MDL_I101X36,MDL_CONJG__RL6X6,MDL_I101X63 diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cudacpp.mk index f047530438..f8d86fd493 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cudacpp.mk @@ -54,65 +54,6 @@ endif #------------------------------------------------------------------------------- -#=== Configure common compiler flags for C++ and CUDA/HIP - -INCFLAGS = -I. -OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here - -# Dependency on src directory -MG5AMC_COMMONLIB = mg5amc_common -LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -INCFLAGS += -I../../src - -# Compiler-specific googletest build directory (#125 and #738) -ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) - override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) -else ifneq ($(shell $(CXX) --version | egrep '^clang'),) - override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) - override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else - override CXXNAME = unknown -endif -###$(info CXXNAME=$(CXXNAME)) -override CXXNAMESUFFIX = _$(CXXNAME) - -# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) -export CXXNAMESUFFIX - -# Dependency on test directory -# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) -# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) -###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation -###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation -TESTDIRCOMMON = ../../../../../test -TESTDIRLOCAL = ../../test -ifneq ($(wildcard $(GTEST_ROOT)),) - TESTDIR = -else ifneq ($(LOCALGTEST),) - TESTDIR=$(TESTDIRLOCAL) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) - TESTDIR = $(TESTDIRCOMMON) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else - TESTDIR = -endif -ifneq ($(GTEST_ROOT),) - GTESTLIBDIR = $(GTEST_ROOT)/lib64/ - GTESTLIBS = $(GTESTLIBDIR)/libgtest.a - GTESTINC = -I$(GTEST_ROOT)/include -else - GTESTLIBDIR = - GTESTLIBS = - GTESTINC = -endif -###$(info GTEST_ROOT = $(GTEST_ROOT)) -###$(info LOCALGTEST = $(LOCALGTEST)) -###$(info TESTDIR = $(TESTDIR)) - -#------------------------------------------------------------------------------- - #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available: eventually use native instead?) @@ -340,6 +281,69 @@ endif #------------------------------------------------------------------------------- +#=== Configure common compiler flags for C++ and CUDA/HIP + +INCFLAGS = -I. +OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here + +# Dependency on src directory +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif +LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +INCFLAGS += -I../../src + +# Compiler-specific googletest build directory (#125 and #738) +ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) + override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) +else ifneq ($(shell $(CXX) --version | egrep '^clang'),) + override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) + override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else + override CXXNAME = unknown +endif +###$(info CXXNAME=$(CXXNAME)) +override CXXNAMESUFFIX = _$(CXXNAME) + +# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) +export CXXNAMESUFFIX + +# Dependency on test directory +# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) +# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) +###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation +###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation +TESTDIRCOMMON = ../../../../../test +TESTDIRLOCAL = ../../test +ifneq ($(wildcard $(GTEST_ROOT)),) + TESTDIR = +else ifneq ($(LOCALGTEST),) + TESTDIR=$(TESTDIRLOCAL) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) + TESTDIR = $(TESTDIRCOMMON) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else + TESTDIR = +endif +ifneq ($(GTEST_ROOT),) + GTESTLIBDIR = $(GTEST_ROOT)/lib64/ + GTESTLIBS = $(GTESTLIBDIR)/libgtest.a + GTESTINC = -I$(GTEST_ROOT)/include +else + GTESTLIBDIR = + GTESTLIBS = + GTESTINC = +endif +###$(info GTEST_ROOT = $(GTEST_ROOT)) +###$(info LOCALGTEST = $(LOCALGTEST)) +###$(info TESTDIR = $(TESTDIR)) + +#------------------------------------------------------------------------------- + #=== Configure PowerPC-specific compiler flags for C++ and CUDA/HIP # PowerPC-specific CXX compiler flags (being reviewed) @@ -606,27 +610,25 @@ override RUNTIME = #=== Makefile TARGETS and build rules below #=============================================================================== -cxx_main=$(BUILDDIR)/check.exe -fcxx_main=$(BUILDDIR)/fcheck.exe -ifneq ($(GPUCC),) - gpu_main=$(BUILDDIR)/gcheck.exe - fgpu_main=$(BUILDDIR)/fgcheck.exe +ifeq ($(GPUCC),) + cxx_checkmain=$(BUILDDIR)/check_cpp.exe + cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe + cxx_testmain=$(BUILDDIR)/runTest_cpp.exe else - gpu_main= - fgpu_main= + gpu_checkmain=$(BUILDDIR)/check_$(GPUSUFFIX).exe + gpu_fcheckmain=$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe + gpu_testmain=$(BUILDDIR)/runTest_$(GPUSUFFIX).exe endif -testmain=$(BUILDDIR)/runTest.exe - # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) # First target (default goal) -ifneq ($(GPUCC),) -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_main) $(fgpu_main) $(if $(GTESTLIBS),$(testmain)) +ifeq ($(GPUCC),) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_checkmain) $(cxx_fcheckmain) $(if $(GTESTLIBS),$(cxx_testmain)) else -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_main) $(fcxx_main) $(if $(GTESTLIBS),$(testmain)) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_checkmain) $(gpu_fcheckmain) $(if $(GTESTLIBS),$(gpu_testmain)) endif # Target (and build options): debug @@ -643,30 +645,30 @@ $(BUILDDIR)/.build.$(TAG): @if [ "$(oldtagsb)" != "" ]; then echo "Cannot build for tag=$(TAG) as old builds exist for other tags:"; echo " $(oldtagsb)"; echo "Please run 'make clean' first\nIf 'make clean' is not enough: run 'make clean USEBUILDDIR=1 AVX=$(AVX) FPTYPE=$(FPTYPE)' or 'make cleanall'"; exit 1; fi @touch $(BUILDDIR)/.build.$(TAG) -# Apply special build flags only to CrossSectionKernel[_$(GPUSUFFIX)].o (no fast math, see #117 and #516) +# Apply special build flags only to CrossSectionKernel_.o (no fast math, see #117 and #516) # Added edgecase for HIP compilation ifeq ($(shell $(CXX) --version | grep ^nvc++),) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -fno-fast-math -$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math endif -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o (NVTX in timermap.h, #679) -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) +# Apply special build flags only to check_sa_.o (NVTX in timermap.h, #679) +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o and (Cu|Hip)randRandomNumberKernel[_$(GPUSUFFIX)].o -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(RNDCXXFLAGS) +# Apply special build flags only to check_sa_.o and (Cu|Hip)randRandomNumberKernel_.o +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) ifeq ($(HASCURAND),hasCurand) # curand headers, #679 -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUDA_INC) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(CUDA_INC) endif ifeq ($(HASHIPRAND),hasHiprand) # hiprand headers -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(HIP_INC) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(HIP_INC) endif # Avoid "warning: builtin __has_trivial_... is deprecated; use __is_trivially_... instead" in GPUCC with icx2023 (#592) @@ -679,21 +681,21 @@ endif # Avoid clang warning "overriding '-ffp-contract=fast' option with '-ffp-contract=on'" (#516) # This patch does remove the warning, but I prefer to keep it disabled for the moment... ###ifneq ($(shell $(CXX) --version | egrep '^(clang|Apple clang|Intel)'),) -###$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -Wno-overriding-t-option ###ifneq ($(GPUCC),) -###$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option ###endif ###endif #### Apply special build flags only to CPPProcess.o (-flto) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += -flto +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += -flto #### Apply special build flags only to CPPProcess.o (AVXFLAGS) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += $(AVXFLAGS) +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += $(AVXFLAGS) # Generic target and build rules: objects from C++ compilation # (NB do not include CUDA_INC here! add it only for NVTX or curand #679) -$(BUILDDIR)/%.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -718,8 +720,8 @@ processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') ###$(info processid_short=$(processid_short)) MG5AMC_CXXLIB = mg5amc_$(processid_short)_cpp -cxx_objects_lib=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o -cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel.o $(BUILDDIR)/RamboSamplingKernels.o +cxx_objects_lib=$(BUILDDIR)/CPPProcess_cpp.o $(BUILDDIR)/MatrixElementKernels_cpp.o $(BUILDDIR)/BridgeKernels_cpp.o $(BUILDDIR)/CrossSectionKernels_cpp.o +cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_cpp.o $(BUILDDIR)/RamboSamplingKernels_cpp.o ifneq ($(GPUCC),) MG5AMC_GPULIB = mg5amc_$(processid_short)_$(GPUSUFFIX) @@ -728,8 +730,8 @@ gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/ endif # Target (and build rules): C++ and CUDA/HIP shared libraries -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) @@ -756,63 +758,63 @@ endif #------------------------------------------------------------------------------- # Target (and build rules): C++ and CUDA/HIP standalone executables -$(cxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o - $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(RNDLIBFLAGS) +$(cxx_checkmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_checkmain): $(BUILDDIR)/check_sa_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o + $(CXX) -o $@ $(BUILDDIR)/check_sa_cpp.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o $(RNDLIBFLAGS) ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(gpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(gpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_checkmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_checkmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(gpu_main): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_checkmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(gpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(gpu_main): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o +$(gpu_checkmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_checkmain): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(GPUCC) -o $@ $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(RNDLIBFLAGS) endif #------------------------------------------------------------------------------- # Generic target and build rules: objects from Fortran compilation -$(BUILDDIR)/%.o : %.f *.inc +$(BUILDDIR)/%_fortran.o : %.f *.inc @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(FC) -I. -c $< -o $@ # Generic target and build rules: objects from Fortran compilation -###$(BUILDDIR)/%.o : %.f *.inc +###$(BUILDDIR)/%_fortran.o : %.f *.inc ### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi ### @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi ### $(FC) -I. -I$(INCDIR) -c $< -o $@ # Target (and build rules): Fortran standalone executables -###$(BUILDDIR)/fcheck_sa.o : $(INCDIR)/fbridge.inc +###$(BUILDDIR)/fcheck_sa_fortran.o : $(INCDIR)/fbridge.inc ifeq ($(UNAME_S),Darwin) -$(fcxx_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(cxx_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fcxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fcxx_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) +$(cxx_fcheckmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ else - $(CXX) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) + $(CXX) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) endif ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(fgpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(fgpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_fcheckmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_fcheckmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') endif ifeq ($(UNAME_S),Darwin) -$(fgpu_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(gpu_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fgpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fgpu_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) +$(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else - $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) + $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif endif @@ -820,83 +822,94 @@ endif # Target (and build rules): test objects and test executable ifeq ($(GPUCC),) -$(BUILDDIR)/testxxx.o: $(GTESTLIBS) -$(BUILDDIR)/testxxx.o: INCFLAGS += $(GTESTINC) -$(BUILDDIR)/testxxx.o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testxxx.o # Comment out this line to skip the C++ test of xxx functions +$(BUILDDIR)/testxxx_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testxxx_cpp.o: INCFLAGS += $(GTESTINC) +$(BUILDDIR)/testxxx_cpp.o: testxxx_cc_ref.txt +$(cxx_testmain): $(BUILDDIR)/testxxx_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testxxx_cpp.o # Comment out this line to skip the C++ test of xxx functions else $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions +$(gpu_testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions endif ifneq ($(UNAME_S),Darwin) # Disable testmisc on Darwin (workaround for issue #838) ifeq ($(GPUCC),) -$(BUILDDIR)/testmisc.o: $(GTESTLIBS) -$(BUILDDIR)/testmisc.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testmisc.o # Comment out this line to skip the C++ miscellaneous tests +$(BUILDDIR)/testmisc_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testmisc_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/testmisc_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testmisc_cpp.o # Comment out this line to skip the C++ miscellaneous tests else $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests +$(gpu_testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests endif endif ifeq ($(GPUCC),) -$(BUILDDIR)/runTest.o: $(GTESTLIBS) -$(BUILDDIR)/runTest.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/runTest.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/runTest.o +$(BUILDDIR)/runTest_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/runTest_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/runTest_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/runTest_cpp.o else $(BUILDDIR)/runTest_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/runTest_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o endif -$(testmain): $(GTESTLIBS) -$(testmain): INCFLAGS += $(GTESTINC) -$(testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest -###$(testmain): LIBFLAGS += -lgtest_main # no longer necessary since we added main() to testxxx.cc +ifeq ($(GPUCC),) +$(cxx_testmain): $(GTESTLIBS) +$(cxx_testmain): INCFLAGS += $(GTESTINC) +$(cxx_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +else +$(gpu_testmain): $(GTESTLIBS) +$(gpu_testmain): INCFLAGS += $(GTESTINC) +$(gpu_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +endif +ifeq ($(GPUCC),) # if at all, OMP is used only in CXX builds (not in GPU builds) ifneq ($(OMPFLAGS),) ifneq ($(shell $(CXX) --version | egrep '^Intel'),) -$(testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) +$(cxx_testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) else ifneq ($(shell $(CXX) --version | egrep '^clang'),) -$(testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 +$(cxx_testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 ###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -###$(testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) +###$(cxx_testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) else -$(testmain): LIBFLAGS += -lgomp +$(cxx_testmain): LIBFLAGS += -lgomp +endif endif endif # Test quadmath in testmisc.cc tests for constexpr_math #627 -###$(testmain): LIBFLAGS += -lquadmath +###ifeq ($(GPUCC),) +###$(cxx_testmain): LIBFLAGS += -lquadmath +###else +###$(gpu_testmain): LIBFLAGS += -lquadmath +###endif # Bypass std::filesystem completely to ease portability on LUMI #803 -#ifneq ($(findstring hipcc,$(GPUCC)),) -#$(testmain): LIBFLAGS += -lstdc++fs -#endif +###ifneq ($(findstring hipcc,$(GPUCC)),) +###$(gpu_testmain): LIBFLAGS += -lstdc++fs +###endif -ifeq ($(GPUCC),) # link only runTest.o -$(testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) +ifeq ($(GPUCC),) # link only runTest_cpp.o +$(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) $(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS) -else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest.o and runTest_$(GPUSUFFIX).o) -$(testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) +else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o) +$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else @@ -1051,51 +1064,50 @@ endif #------------------------------------------------------------------------------- -# Target: check (run the C++ test executable) -# [NB THIS IS WHAT IS USED IN THE GITHUB CI!] -# [FIXME: SHOULD CHANGE THE TARGET NAME "check" THAT HAS NOTHING TO DO WITH "check.exe"] -ifneq ($(GPUCC),) -check: runTest cmpFGcheck -else -check: runTest cmpFcheck -endif +# Target: 'make test' (execute runTest.exe, and compare check.exe with fcheck.exe) +# [NB: THIS IS WHAT IS TESTED IN THE GITHUB CI!] +# [NB: This used to be called 'make check' but the name has been changed as this has nothing to do with 'check.exe'] +test: runTest cmpFcheck -# Target: runTest (run the C++ test executable runTest.exe) +# Target: runTest (run the C++ or CUDA/HIP test executable runTest.exe) runTest: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/runTest.exe +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/runTest_cpp.exe +else + $(RUNTIME) $(BUILDDIR)/runTest_$(GPUSUFFIX).exe +endif -# Target: runCheck (run the C++ standalone executable check.exe, with a small number of events) +# Target: runCheck (run the C++ or CUDA/HIP standalone executable check.exe, with a small number of events) runCheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/check.exe -p 2 32 2 - -# Target: runGcheck (run the CUDA/HIP standalone executable gcheck.exe, with a small number of events) -runGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/gcheck.exe -p 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/check_cpp.exe -p 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +endif -# Target: runFcheck (run the Fortran standalone executable - with C++ MEs - fcheck.exe, with a small number of events) +# Target: runFcheck (run the Fortran standalone executable - with C++ or CUDA/HIP MEs - fcheck.exe, with a small number of events) runFcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 - -# Target: runFGcheck (run the Fortran standalone executable - with CUDA/HIP MEs - fgcheck.exe, with a small number of events) -runFGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 +endif -# Target: cmpFcheck (compare ME results from the C++ and Fortran with C++ MEs standalone executables, with a small number of events) +# Target: cmpFcheck (compare ME results from the C++/CUDA/HIP and Fortran with C++/CUDA/HIP MEs standalone executables, with a small number of events) cmpFcheck: all.$(TAG) @echo - @echo "$(BUILDDIR)/check.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/check.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi - -# Target: cmpFGcheck (compare ME results from the CUDA/HIP and Fortran with CUDA/HIP MEs standalone executables, with a small number of events) -cmpFGcheck: all.$(TAG) - @echo - @echo "$(BUILDDIR)/gcheck.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fgcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/gcheck.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +ifeq ($(GPUCC),) + @echo "$(BUILDDIR)/check_cpp.exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_cpp.exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_cpp.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +else + @echo "$(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +endif # Target: cuda-memcheck (run the CUDA standalone executable gcheck.exe with a small number of events through cuda-memcheck) cuda-memcheck: all.$(TAG) - $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/gcheck.exe -p 2 32 2 + $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/makefile b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/makefile index 22517d0cc6..6a21300f7a 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/makefile +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/makefile @@ -58,13 +58,15 @@ endif LINKLIBS = $(LINK_MADLOOP_LIB) $(LINK_LOOP_LIBS) -L$(LIBDIR) -ldhelas -ldsample -lmodel -lgeneric -lpdf -lcernlib $(llhapdf) -lbias CUDACPP_MAKEFILE=cudacpp.mk -CUDACPP_COMMONLIB=mg5amc_common processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') ifeq ($(BACKEND),cuda) +CUDACPP_COMMONLIB=mg5amc_common_cuda CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_cuda else ifeq ($(BACKEND),hip) +CUDACPP_COMMONLIB=mg5amc_common_hip CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_hip else +CUDACPP_COMMONLIB=mg5amc_common_cpp CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_cpp endif diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/src/Parameters_MSSM_SLHA2.h b/epochX/cudacpp/susy_gg_t1t1.mad/src/Parameters_MSSM_SLHA2.h index c192a660ab..16bcbcdc84 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/src/Parameters_MSSM_SLHA2.h +++ b/epochX/cudacpp/susy_gg_t1t1.mad/src/Parameters_MSSM_SLHA2.h @@ -58,8 +58,7 @@ namespace mg5amcCpu // (none) // Model parameters dependent on aS - //double mdl_sqrt__aS, G; // now computed event-by-event (running alphas #373) - //cxsmpl mdl_G__exp__2; // now computed event-by-event (running alphas #373) + //double mdl_sqrt__aS, G, mdl_G__exp__2; // now computed event-by-event (running alphas #373) // Model couplings dependent on aS //cxsmpl GC_6, GC_55, GC_57, GC_90; // now computed event-by-event (running alphas #373) @@ -750,7 +749,7 @@ namespace mg5amcCpu // Model parameters dependent on aS //constexpr double mdl_sqrt__aS = constexpr_sqrt( aS ); // now computed event-by-event (running alphas #373) //constexpr double G = 2. * mdl_sqrt__aS * constexpr_sqrt( M_PI ); // now computed event-by-event (running alphas #373) - //constexpr cxsmpl mdl_G__exp__2 = ( ( G ) * ( G ) ); // now computed event-by-event (running alphas #373) + //constexpr double mdl_G__exp__2 = ( ( G ) * ( G ) ); // now computed event-by-event (running alphas #373) // Model couplings dependent on aS //constexpr cxsmpl GC_6 = -G; // now computed event-by-event (running alphas #373) diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/src/cudacpp_src.mk b/epochX/cudacpp/susy_gg_t1t1.mad/src/cudacpp_src.mk index 4fe7fcd011..ebb1d4d1fa 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/src/cudacpp_src.mk +++ b/epochX/cudacpp/susy_gg_t1t1.mad/src/cudacpp_src.mk @@ -98,7 +98,11 @@ endif # NB1: there are no CUDA targets in src as we avoid RDC! # NB2: CUDA includes for curand.h are no longer needed in the C++ code anywhere in src! -MG5AMC_COMMONLIB = mg5amc_common +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) @@ -126,7 +130,7 @@ $(LIBDIR)/.build.$(TAG): #------------------------------------------------------------------------------- # Generic target and build rules: objects from C++ compilation -$(BUILDDIR)/%.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -139,22 +143,22 @@ endif #------------------------------------------------------------------------------- -cxx_objects=$(addprefix $(BUILDDIR)/, read_slha.o) -ifneq ($(GPUCC),) - gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_MSSM_SLHA2_$(GPUSUFFIX).o) +cxx_objects=$(addprefix $(BUILDDIR)/, read_slha_cpp.o) +ifeq ($(GPUCC),) + cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_MSSM_SLHA2_cpp.o) else - cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_MSSM_SLHA2.o) + gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_MSSM_SLHA2_$(GPUSUFFIX).o) endif # Target (and build rules): common (src) library -ifneq ($(GPUCC),) -$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) - @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) -else +ifeq ($(GPUCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) +else +$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) + @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi + $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt index e4634819f9..2f5dfd6362 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.132 s +1 processes with 6 diagrams generated in 0.120 s Total: 1 processes with 6 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1 Load PLUGIN.CUDACPP_OUTPUT @@ -583,7 +583,7 @@ ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.198 s +ALOHA: aloha creates 3 routines in 0.168 s VVV1 VSS1 VSS1 @@ -599,7 +599,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. quit -real 0m1.420s -user 0m1.352s -sys 0m0.061s -Code generation completed in 1 seconds +real 0m1.256s +user 0m1.176s +sys 0m0.072s +Code generation completed in 2 seconds diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/cudacpp.mk index f047530438..f8d86fd493 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/cudacpp.mk @@ -54,65 +54,6 @@ endif #------------------------------------------------------------------------------- -#=== Configure common compiler flags for C++ and CUDA/HIP - -INCFLAGS = -I. -OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here - -# Dependency on src directory -MG5AMC_COMMONLIB = mg5amc_common -LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -INCFLAGS += -I../../src - -# Compiler-specific googletest build directory (#125 and #738) -ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) - override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) -else ifneq ($(shell $(CXX) --version | egrep '^clang'),) - override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) - override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else - override CXXNAME = unknown -endif -###$(info CXXNAME=$(CXXNAME)) -override CXXNAMESUFFIX = _$(CXXNAME) - -# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) -export CXXNAMESUFFIX - -# Dependency on test directory -# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) -# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) -###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation -###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation -TESTDIRCOMMON = ../../../../../test -TESTDIRLOCAL = ../../test -ifneq ($(wildcard $(GTEST_ROOT)),) - TESTDIR = -else ifneq ($(LOCALGTEST),) - TESTDIR=$(TESTDIRLOCAL) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) - TESTDIR = $(TESTDIRCOMMON) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else - TESTDIR = -endif -ifneq ($(GTEST_ROOT),) - GTESTLIBDIR = $(GTEST_ROOT)/lib64/ - GTESTLIBS = $(GTESTLIBDIR)/libgtest.a - GTESTINC = -I$(GTEST_ROOT)/include -else - GTESTLIBDIR = - GTESTLIBS = - GTESTINC = -endif -###$(info GTEST_ROOT = $(GTEST_ROOT)) -###$(info LOCALGTEST = $(LOCALGTEST)) -###$(info TESTDIR = $(TESTDIR)) - -#------------------------------------------------------------------------------- - #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available: eventually use native instead?) @@ -340,6 +281,69 @@ endif #------------------------------------------------------------------------------- +#=== Configure common compiler flags for C++ and CUDA/HIP + +INCFLAGS = -I. +OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here + +# Dependency on src directory +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif +LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +INCFLAGS += -I../../src + +# Compiler-specific googletest build directory (#125 and #738) +ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) + override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) +else ifneq ($(shell $(CXX) --version | egrep '^clang'),) + override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) + override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else + override CXXNAME = unknown +endif +###$(info CXXNAME=$(CXXNAME)) +override CXXNAMESUFFIX = _$(CXXNAME) + +# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) +export CXXNAMESUFFIX + +# Dependency on test directory +# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) +# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) +###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation +###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation +TESTDIRCOMMON = ../../../../../test +TESTDIRLOCAL = ../../test +ifneq ($(wildcard $(GTEST_ROOT)),) + TESTDIR = +else ifneq ($(LOCALGTEST),) + TESTDIR=$(TESTDIRLOCAL) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) + TESTDIR = $(TESTDIRCOMMON) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else + TESTDIR = +endif +ifneq ($(GTEST_ROOT),) + GTESTLIBDIR = $(GTEST_ROOT)/lib64/ + GTESTLIBS = $(GTESTLIBDIR)/libgtest.a + GTESTINC = -I$(GTEST_ROOT)/include +else + GTESTLIBDIR = + GTESTLIBS = + GTESTINC = +endif +###$(info GTEST_ROOT = $(GTEST_ROOT)) +###$(info LOCALGTEST = $(LOCALGTEST)) +###$(info TESTDIR = $(TESTDIR)) + +#------------------------------------------------------------------------------- + #=== Configure PowerPC-specific compiler flags for C++ and CUDA/HIP # PowerPC-specific CXX compiler flags (being reviewed) @@ -606,27 +610,25 @@ override RUNTIME = #=== Makefile TARGETS and build rules below #=============================================================================== -cxx_main=$(BUILDDIR)/check.exe -fcxx_main=$(BUILDDIR)/fcheck.exe -ifneq ($(GPUCC),) - gpu_main=$(BUILDDIR)/gcheck.exe - fgpu_main=$(BUILDDIR)/fgcheck.exe +ifeq ($(GPUCC),) + cxx_checkmain=$(BUILDDIR)/check_cpp.exe + cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe + cxx_testmain=$(BUILDDIR)/runTest_cpp.exe else - gpu_main= - fgpu_main= + gpu_checkmain=$(BUILDDIR)/check_$(GPUSUFFIX).exe + gpu_fcheckmain=$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe + gpu_testmain=$(BUILDDIR)/runTest_$(GPUSUFFIX).exe endif -testmain=$(BUILDDIR)/runTest.exe - # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) # First target (default goal) -ifneq ($(GPUCC),) -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_main) $(fgpu_main) $(if $(GTESTLIBS),$(testmain)) +ifeq ($(GPUCC),) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_checkmain) $(cxx_fcheckmain) $(if $(GTESTLIBS),$(cxx_testmain)) else -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_main) $(fcxx_main) $(if $(GTESTLIBS),$(testmain)) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_checkmain) $(gpu_fcheckmain) $(if $(GTESTLIBS),$(gpu_testmain)) endif # Target (and build options): debug @@ -643,30 +645,30 @@ $(BUILDDIR)/.build.$(TAG): @if [ "$(oldtagsb)" != "" ]; then echo "Cannot build for tag=$(TAG) as old builds exist for other tags:"; echo " $(oldtagsb)"; echo "Please run 'make clean' first\nIf 'make clean' is not enough: run 'make clean USEBUILDDIR=1 AVX=$(AVX) FPTYPE=$(FPTYPE)' or 'make cleanall'"; exit 1; fi @touch $(BUILDDIR)/.build.$(TAG) -# Apply special build flags only to CrossSectionKernel[_$(GPUSUFFIX)].o (no fast math, see #117 and #516) +# Apply special build flags only to CrossSectionKernel_.o (no fast math, see #117 and #516) # Added edgecase for HIP compilation ifeq ($(shell $(CXX) --version | grep ^nvc++),) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -fno-fast-math -$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math endif -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o (NVTX in timermap.h, #679) -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) +# Apply special build flags only to check_sa_.o (NVTX in timermap.h, #679) +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o and (Cu|Hip)randRandomNumberKernel[_$(GPUSUFFIX)].o -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(RNDCXXFLAGS) +# Apply special build flags only to check_sa_.o and (Cu|Hip)randRandomNumberKernel_.o +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) ifeq ($(HASCURAND),hasCurand) # curand headers, #679 -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUDA_INC) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(CUDA_INC) endif ifeq ($(HASHIPRAND),hasHiprand) # hiprand headers -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(HIP_INC) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(HIP_INC) endif # Avoid "warning: builtin __has_trivial_... is deprecated; use __is_trivially_... instead" in GPUCC with icx2023 (#592) @@ -679,21 +681,21 @@ endif # Avoid clang warning "overriding '-ffp-contract=fast' option with '-ffp-contract=on'" (#516) # This patch does remove the warning, but I prefer to keep it disabled for the moment... ###ifneq ($(shell $(CXX) --version | egrep '^(clang|Apple clang|Intel)'),) -###$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -Wno-overriding-t-option ###ifneq ($(GPUCC),) -###$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option ###endif ###endif #### Apply special build flags only to CPPProcess.o (-flto) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += -flto +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += -flto #### Apply special build flags only to CPPProcess.o (AVXFLAGS) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += $(AVXFLAGS) +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += $(AVXFLAGS) # Generic target and build rules: objects from C++ compilation # (NB do not include CUDA_INC here! add it only for NVTX or curand #679) -$(BUILDDIR)/%.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -718,8 +720,8 @@ processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') ###$(info processid_short=$(processid_short)) MG5AMC_CXXLIB = mg5amc_$(processid_short)_cpp -cxx_objects_lib=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o -cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel.o $(BUILDDIR)/RamboSamplingKernels.o +cxx_objects_lib=$(BUILDDIR)/CPPProcess_cpp.o $(BUILDDIR)/MatrixElementKernels_cpp.o $(BUILDDIR)/BridgeKernels_cpp.o $(BUILDDIR)/CrossSectionKernels_cpp.o +cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_cpp.o $(BUILDDIR)/RamboSamplingKernels_cpp.o ifneq ($(GPUCC),) MG5AMC_GPULIB = mg5amc_$(processid_short)_$(GPUSUFFIX) @@ -728,8 +730,8 @@ gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/ endif # Target (and build rules): C++ and CUDA/HIP shared libraries -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) @@ -756,63 +758,63 @@ endif #------------------------------------------------------------------------------- # Target (and build rules): C++ and CUDA/HIP standalone executables -$(cxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o - $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(RNDLIBFLAGS) +$(cxx_checkmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_checkmain): $(BUILDDIR)/check_sa_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o + $(CXX) -o $@ $(BUILDDIR)/check_sa_cpp.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o $(RNDLIBFLAGS) ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(gpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(gpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_checkmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_checkmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(gpu_main): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_checkmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(gpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(gpu_main): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o +$(gpu_checkmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_checkmain): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(GPUCC) -o $@ $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(RNDLIBFLAGS) endif #------------------------------------------------------------------------------- # Generic target and build rules: objects from Fortran compilation -$(BUILDDIR)/%.o : %.f *.inc +$(BUILDDIR)/%_fortran.o : %.f *.inc @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(FC) -I. -c $< -o $@ # Generic target and build rules: objects from Fortran compilation -###$(BUILDDIR)/%.o : %.f *.inc +###$(BUILDDIR)/%_fortran.o : %.f *.inc ### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi ### @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi ### $(FC) -I. -I$(INCDIR) -c $< -o $@ # Target (and build rules): Fortran standalone executables -###$(BUILDDIR)/fcheck_sa.o : $(INCDIR)/fbridge.inc +###$(BUILDDIR)/fcheck_sa_fortran.o : $(INCDIR)/fbridge.inc ifeq ($(UNAME_S),Darwin) -$(fcxx_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(cxx_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fcxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fcxx_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) +$(cxx_fcheckmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ else - $(CXX) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) + $(CXX) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) endif ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(fgpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(fgpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_fcheckmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_fcheckmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') endif ifeq ($(UNAME_S),Darwin) -$(fgpu_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(gpu_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fgpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fgpu_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) +$(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else - $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) + $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif endif @@ -820,83 +822,94 @@ endif # Target (and build rules): test objects and test executable ifeq ($(GPUCC),) -$(BUILDDIR)/testxxx.o: $(GTESTLIBS) -$(BUILDDIR)/testxxx.o: INCFLAGS += $(GTESTINC) -$(BUILDDIR)/testxxx.o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testxxx.o # Comment out this line to skip the C++ test of xxx functions +$(BUILDDIR)/testxxx_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testxxx_cpp.o: INCFLAGS += $(GTESTINC) +$(BUILDDIR)/testxxx_cpp.o: testxxx_cc_ref.txt +$(cxx_testmain): $(BUILDDIR)/testxxx_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testxxx_cpp.o # Comment out this line to skip the C++ test of xxx functions else $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions +$(gpu_testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions endif ifneq ($(UNAME_S),Darwin) # Disable testmisc on Darwin (workaround for issue #838) ifeq ($(GPUCC),) -$(BUILDDIR)/testmisc.o: $(GTESTLIBS) -$(BUILDDIR)/testmisc.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testmisc.o # Comment out this line to skip the C++ miscellaneous tests +$(BUILDDIR)/testmisc_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testmisc_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/testmisc_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testmisc_cpp.o # Comment out this line to skip the C++ miscellaneous tests else $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests +$(gpu_testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests endif endif ifeq ($(GPUCC),) -$(BUILDDIR)/runTest.o: $(GTESTLIBS) -$(BUILDDIR)/runTest.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/runTest.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/runTest.o +$(BUILDDIR)/runTest_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/runTest_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/runTest_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/runTest_cpp.o else $(BUILDDIR)/runTest_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/runTest_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o endif -$(testmain): $(GTESTLIBS) -$(testmain): INCFLAGS += $(GTESTINC) -$(testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest -###$(testmain): LIBFLAGS += -lgtest_main # no longer necessary since we added main() to testxxx.cc +ifeq ($(GPUCC),) +$(cxx_testmain): $(GTESTLIBS) +$(cxx_testmain): INCFLAGS += $(GTESTINC) +$(cxx_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +else +$(gpu_testmain): $(GTESTLIBS) +$(gpu_testmain): INCFLAGS += $(GTESTINC) +$(gpu_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +endif +ifeq ($(GPUCC),) # if at all, OMP is used only in CXX builds (not in GPU builds) ifneq ($(OMPFLAGS),) ifneq ($(shell $(CXX) --version | egrep '^Intel'),) -$(testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) +$(cxx_testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) else ifneq ($(shell $(CXX) --version | egrep '^clang'),) -$(testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 +$(cxx_testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 ###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -###$(testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) +###$(cxx_testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) else -$(testmain): LIBFLAGS += -lgomp +$(cxx_testmain): LIBFLAGS += -lgomp +endif endif endif # Test quadmath in testmisc.cc tests for constexpr_math #627 -###$(testmain): LIBFLAGS += -lquadmath +###ifeq ($(GPUCC),) +###$(cxx_testmain): LIBFLAGS += -lquadmath +###else +###$(gpu_testmain): LIBFLAGS += -lquadmath +###endif # Bypass std::filesystem completely to ease portability on LUMI #803 -#ifneq ($(findstring hipcc,$(GPUCC)),) -#$(testmain): LIBFLAGS += -lstdc++fs -#endif +###ifneq ($(findstring hipcc,$(GPUCC)),) +###$(gpu_testmain): LIBFLAGS += -lstdc++fs +###endif -ifeq ($(GPUCC),) # link only runTest.o -$(testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) +ifeq ($(GPUCC),) # link only runTest_cpp.o +$(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) $(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS) -else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest.o and runTest_$(GPUSUFFIX).o) -$(testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) +else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o) +$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else @@ -1051,51 +1064,50 @@ endif #------------------------------------------------------------------------------- -# Target: check (run the C++ test executable) -# [NB THIS IS WHAT IS USED IN THE GITHUB CI!] -# [FIXME: SHOULD CHANGE THE TARGET NAME "check" THAT HAS NOTHING TO DO WITH "check.exe"] -ifneq ($(GPUCC),) -check: runTest cmpFGcheck -else -check: runTest cmpFcheck -endif +# Target: 'make test' (execute runTest.exe, and compare check.exe with fcheck.exe) +# [NB: THIS IS WHAT IS TESTED IN THE GITHUB CI!] +# [NB: This used to be called 'make check' but the name has been changed as this has nothing to do with 'check.exe'] +test: runTest cmpFcheck -# Target: runTest (run the C++ test executable runTest.exe) +# Target: runTest (run the C++ or CUDA/HIP test executable runTest.exe) runTest: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/runTest.exe +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/runTest_cpp.exe +else + $(RUNTIME) $(BUILDDIR)/runTest_$(GPUSUFFIX).exe +endif -# Target: runCheck (run the C++ standalone executable check.exe, with a small number of events) +# Target: runCheck (run the C++ or CUDA/HIP standalone executable check.exe, with a small number of events) runCheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/check.exe -p 2 32 2 - -# Target: runGcheck (run the CUDA/HIP standalone executable gcheck.exe, with a small number of events) -runGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/gcheck.exe -p 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/check_cpp.exe -p 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +endif -# Target: runFcheck (run the Fortran standalone executable - with C++ MEs - fcheck.exe, with a small number of events) +# Target: runFcheck (run the Fortran standalone executable - with C++ or CUDA/HIP MEs - fcheck.exe, with a small number of events) runFcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 - -# Target: runFGcheck (run the Fortran standalone executable - with CUDA/HIP MEs - fgcheck.exe, with a small number of events) -runFGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 +endif -# Target: cmpFcheck (compare ME results from the C++ and Fortran with C++ MEs standalone executables, with a small number of events) +# Target: cmpFcheck (compare ME results from the C++/CUDA/HIP and Fortran with C++/CUDA/HIP MEs standalone executables, with a small number of events) cmpFcheck: all.$(TAG) @echo - @echo "$(BUILDDIR)/check.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/check.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi - -# Target: cmpFGcheck (compare ME results from the CUDA/HIP and Fortran with CUDA/HIP MEs standalone executables, with a small number of events) -cmpFGcheck: all.$(TAG) - @echo - @echo "$(BUILDDIR)/gcheck.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fgcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/gcheck.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +ifeq ($(GPUCC),) + @echo "$(BUILDDIR)/check_cpp.exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_cpp.exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_cpp.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +else + @echo "$(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +endif # Target: cuda-memcheck (run the CUDA standalone executable gcheck.exe with a small number of events through cuda-memcheck) cuda-memcheck: all.$(TAG) - $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/gcheck.exe -p 2 32 2 + $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/src/Parameters_MSSM_SLHA2.h b/epochX/cudacpp/susy_gg_t1t1.sa/src/Parameters_MSSM_SLHA2.h index c192a660ab..16bcbcdc84 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/src/Parameters_MSSM_SLHA2.h +++ b/epochX/cudacpp/susy_gg_t1t1.sa/src/Parameters_MSSM_SLHA2.h @@ -58,8 +58,7 @@ namespace mg5amcCpu // (none) // Model parameters dependent on aS - //double mdl_sqrt__aS, G; // now computed event-by-event (running alphas #373) - //cxsmpl mdl_G__exp__2; // now computed event-by-event (running alphas #373) + //double mdl_sqrt__aS, G, mdl_G__exp__2; // now computed event-by-event (running alphas #373) // Model couplings dependent on aS //cxsmpl GC_6, GC_55, GC_57, GC_90; // now computed event-by-event (running alphas #373) @@ -750,7 +749,7 @@ namespace mg5amcCpu // Model parameters dependent on aS //constexpr double mdl_sqrt__aS = constexpr_sqrt( aS ); // now computed event-by-event (running alphas #373) //constexpr double G = 2. * mdl_sqrt__aS * constexpr_sqrt( M_PI ); // now computed event-by-event (running alphas #373) - //constexpr cxsmpl mdl_G__exp__2 = ( ( G ) * ( G ) ); // now computed event-by-event (running alphas #373) + //constexpr double mdl_G__exp__2 = ( ( G ) * ( G ) ); // now computed event-by-event (running alphas #373) // Model couplings dependent on aS //constexpr cxsmpl GC_6 = -G; // now computed event-by-event (running alphas #373) diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/src/cudacpp_src.mk b/epochX/cudacpp/susy_gg_t1t1.sa/src/cudacpp_src.mk index 4fe7fcd011..ebb1d4d1fa 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/src/cudacpp_src.mk +++ b/epochX/cudacpp/susy_gg_t1t1.sa/src/cudacpp_src.mk @@ -98,7 +98,11 @@ endif # NB1: there are no CUDA targets in src as we avoid RDC! # NB2: CUDA includes for curand.h are no longer needed in the C++ code anywhere in src! -MG5AMC_COMMONLIB = mg5amc_common +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) @@ -126,7 +130,7 @@ $(LIBDIR)/.build.$(TAG): #------------------------------------------------------------------------------- # Generic target and build rules: objects from C++ compilation -$(BUILDDIR)/%.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -139,22 +143,22 @@ endif #------------------------------------------------------------------------------- -cxx_objects=$(addprefix $(BUILDDIR)/, read_slha.o) -ifneq ($(GPUCC),) - gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_MSSM_SLHA2_$(GPUSUFFIX).o) +cxx_objects=$(addprefix $(BUILDDIR)/, read_slha_cpp.o) +ifeq ($(GPUCC),) + cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_MSSM_SLHA2_cpp.o) else - cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_MSSM_SLHA2.o) + gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_MSSM_SLHA2_$(GPUSUFFIX).o) endif # Target (and build rules): common (src) library -ifneq ($(GPUCC),) -$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) - @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) -else +ifeq ($(GPUCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) +else +$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) + @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi + $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt index 7491cac3c5..5e673473ee 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.126 s +1 processes with 3 diagrams generated in 0.116 s Total: 1 processes with 3 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_tt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -577,7 +577,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -593,16 +593,16 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.110 s +Wrote files for 10 helas calls in 0.103 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.145 s +ALOHA: aloha creates 2 routines in 0.128 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.141 s +ALOHA: aloha creates 4 routines in 0.124 s VVV1 FFV1 FFV1 @@ -638,9 +638,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.809s -user 0m2.561s -sys 0m0.237s +real 0m2.634s +user 0m2.369s +sys 0m0.243s Code generation completed in 3 seconds ************************************************************ * * @@ -666,8 +666,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run quit INFO: @@ -696,8 +696,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards param quit INFO: diff --git a/epochX/cudacpp/susy_gg_tt.mad/Source/MODEL/input.inc b/epochX/cudacpp/susy_gg_tt.mad/Source/MODEL/input.inc index 286a92e533..59b3e6e7f2 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/Source/MODEL/input.inc +++ b/epochX/cudacpp/susy_gg_tt.mad/Source/MODEL/input.inc @@ -2,29 +2,30 @@ ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc c written by the UFO converter ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc - DOUBLE PRECISION MDL_SQRT__AS,MDL_MSD5,MDL_MSD2,MDL_MSU5 - $ ,MDL_MSU2,MDL_MSL5,MDL_MSL2,MDL_MSN2,MDL_RMU22X2,MDL_RMQ22X2 - $ ,MDL_RML22X2,MDL_RME22X2,MDL_RMD22X2,MDL_CONJG__RN3X3 - $ ,MDL_CONJG__CKM3X3,MDL_RU4X4,MDL_RU1X1,MDL_RN3X3,MDL_RN1X1 - $ ,MDL_RL4X4,MDL_RL1X1,MDL_RD4X4,MDL_RD1X1,MDL_I98X11,MDL_I97X11 - $ ,MDL_I96X11,MDL_I93X11,MDL_I92X11,MDL_I87X11,MDL_I82X11 - $ ,MDL_I74X11,MDL_I6X44,MDL_I5X11,MDL_I53X11,MDL_I52X44 - $ ,MDL_I51X11,MDL_I39X11,MDL_I31X11,MDL_I26X44,MDL_I25X11 - $ ,MDL_I12X11,MDL_I102X44,MDL_I101X44,MDL_I100X44,MDL_CKM3X3 - $ ,MDL_ATAN__TB,MDL_BETA,MDL_CW,MDL_MZ__EXP__2,MDL_CW__EXP__2 - $ ,MDL_SW,MDL_COS__BETA,MDL_SIN__BETA,MDL_SQRT__2,MDL_SW__EXP__2 - $ ,MDL_COS__ALP,MDL_SIN__ALP,MDL_EE,MDL_GP,MDL_GW,MDL_VEV,MDL_VD - $ ,MDL_VU,MDL_EE__EXP__2,MDL_RRD1X1,MDL_RRD2X2,MDL_RRD3X3 - $ ,MDL_RRD3X6,MDL_RRD4X4,MDL_RRD5X5,MDL_RRD6X3,MDL_RRD6X6,MDL_ALP - $ ,MDL_RMUH,MDL_TB,MDL_MA2,MDL_RMD21X1,MDL_RMD23X3,MDL_RME21X1 - $ ,MDL_RME23X3,MDL_RML21X1,MDL_RML23X3,MDL_RMX1,MDL_RMX2,MDL_RMX3 - $ ,MDL_MHD2,MDL_MHU2,MDL_RMQ21X1,MDL_RMQ23X3,MDL_RMU21X1 - $ ,MDL_RMU23X3,MDL_RNN1X1,MDL_RNN1X2,MDL_RNN1X3,MDL_RNN1X4 - $ ,MDL_RNN2X1,MDL_RNN2X2,MDL_RNN2X3,MDL_RNN2X4,MDL_RNN3X1 - $ ,MDL_RNN3X2,MDL_RNN3X3,MDL_RNN3X4,MDL_RNN4X1,MDL_RNN4X2 - $ ,MDL_RNN4X3,MDL_RNN4X4,MDL_RRL1X1,MDL_RRL2X2,MDL_RRL3X3 - $ ,MDL_RRL3X6,MDL_RRL4X4,MDL_RRL5X5,MDL_RRL6X3,MDL_RRL6X6,AEWM1 - $ ,AS,MDL_RRN1X1,MDL_RRN2X2,MDL_RRN3X3,MDL_RTD3X3,MDL_RTE3X3 + DOUBLE PRECISION MDL_SQRT__AS,MDL_G__EXP__2,MDL_MSD5,MDL_MSD2 + $ ,MDL_MSU5,MDL_MSU2,MDL_MSL5,MDL_MSL2,MDL_MSN2,MDL_RMU22X2 + $ ,MDL_RMQ22X2,MDL_RML22X2,MDL_RME22X2,MDL_RMD22X2 + $ ,MDL_CONJG__RN3X3,MDL_CONJG__CKM3X3,MDL_RU4X4,MDL_RU1X1 + $ ,MDL_RN3X3,MDL_RN1X1,MDL_RL4X4,MDL_RL1X1,MDL_RD4X4,MDL_RD1X1 + $ ,MDL_I98X11,MDL_I97X11,MDL_I96X11,MDL_I93X11,MDL_I92X11 + $ ,MDL_I87X11,MDL_I82X11,MDL_I74X11,MDL_I6X44,MDL_I5X11 + $ ,MDL_I53X11,MDL_I52X44,MDL_I51X11,MDL_I39X11,MDL_I31X11 + $ ,MDL_I26X44,MDL_I25X11,MDL_I12X11,MDL_I102X44,MDL_I101X44 + $ ,MDL_I100X44,MDL_CKM3X3,MDL_ATAN__TB,MDL_BETA,MDL_CW + $ ,MDL_MZ__EXP__2,MDL_CW__EXP__2,MDL_SW,MDL_COS__BETA + $ ,MDL_SIN__BETA,MDL_SQRT__2,MDL_SW__EXP__2,MDL_COS__ALP + $ ,MDL_SIN__ALP,MDL_EE,MDL_GP,MDL_GW,MDL_VEV,MDL_VD,MDL_VU + $ ,MDL_EE__EXP__2,MDL_RRD1X1,MDL_RRD2X2,MDL_RRD3X3,MDL_RRD3X6 + $ ,MDL_RRD4X4,MDL_RRD5X5,MDL_RRD6X3,MDL_RRD6X6,MDL_ALP,MDL_RMUH + $ ,MDL_TB,MDL_MA2,MDL_RMD21X1,MDL_RMD23X3,MDL_RME21X1,MDL_RME23X3 + $ ,MDL_RML21X1,MDL_RML23X3,MDL_RMX1,MDL_RMX2,MDL_RMX3,MDL_MHD2 + $ ,MDL_MHU2,MDL_RMQ21X1,MDL_RMQ23X3,MDL_RMU21X1,MDL_RMU23X3 + $ ,MDL_RNN1X1,MDL_RNN1X2,MDL_RNN1X3,MDL_RNN1X4,MDL_RNN2X1 + $ ,MDL_RNN2X2,MDL_RNN2X3,MDL_RNN2X4,MDL_RNN3X1,MDL_RNN3X2 + $ ,MDL_RNN3X3,MDL_RNN3X4,MDL_RNN4X1,MDL_RNN4X2,MDL_RNN4X3 + $ ,MDL_RNN4X4,MDL_RRL1X1,MDL_RRL2X2,MDL_RRL3X3,MDL_RRL3X6 + $ ,MDL_RRL4X4,MDL_RRL5X5,MDL_RRL6X3,MDL_RRL6X6,AEWM1,AS + $ ,MDL_RRN1X1,MDL_RRN2X2,MDL_RRN3X3,MDL_RTD3X3,MDL_RTE3X3 $ ,MDL_RTU3X3,MDL_RUU1X1,MDL_RUU1X2,MDL_RUU2X1,MDL_RUU2X2 $ ,MDL_RMNS1X1,MDL_RMNS2X2,MDL_RMNS3X3,MDL_RRU1X1,MDL_RRU2X2 $ ,MDL_RRU3X3,MDL_RRU3X6,MDL_RRU4X4,MDL_RRU5X5,MDL_RRU6X3 @@ -32,29 +33,30 @@ ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc $ ,MDL_RVV1X2,MDL_RVV2X1,MDL_RVV2X2,MDL_RYD3X3,MDL_RYE3X3 $ ,MDL_RYU3X3 - COMMON/PARAMS_R/ MDL_SQRT__AS,MDL_MSD5,MDL_MSD2,MDL_MSU5 - $ ,MDL_MSU2,MDL_MSL5,MDL_MSL2,MDL_MSN2,MDL_RMU22X2,MDL_RMQ22X2 - $ ,MDL_RML22X2,MDL_RME22X2,MDL_RMD22X2,MDL_CONJG__RN3X3 - $ ,MDL_CONJG__CKM3X3,MDL_RU4X4,MDL_RU1X1,MDL_RN3X3,MDL_RN1X1 - $ ,MDL_RL4X4,MDL_RL1X1,MDL_RD4X4,MDL_RD1X1,MDL_I98X11,MDL_I97X11 - $ ,MDL_I96X11,MDL_I93X11,MDL_I92X11,MDL_I87X11,MDL_I82X11 - $ ,MDL_I74X11,MDL_I6X44,MDL_I5X11,MDL_I53X11,MDL_I52X44 - $ ,MDL_I51X11,MDL_I39X11,MDL_I31X11,MDL_I26X44,MDL_I25X11 - $ ,MDL_I12X11,MDL_I102X44,MDL_I101X44,MDL_I100X44,MDL_CKM3X3 - $ ,MDL_ATAN__TB,MDL_BETA,MDL_CW,MDL_MZ__EXP__2,MDL_CW__EXP__2 - $ ,MDL_SW,MDL_COS__BETA,MDL_SIN__BETA,MDL_SQRT__2,MDL_SW__EXP__2 - $ ,MDL_COS__ALP,MDL_SIN__ALP,MDL_EE,MDL_GP,MDL_GW,MDL_VEV,MDL_VD - $ ,MDL_VU,MDL_EE__EXP__2,MDL_RRD1X1,MDL_RRD2X2,MDL_RRD3X3 - $ ,MDL_RRD3X6,MDL_RRD4X4,MDL_RRD5X5,MDL_RRD6X3,MDL_RRD6X6,MDL_ALP - $ ,MDL_RMUH,MDL_TB,MDL_MA2,MDL_RMD21X1,MDL_RMD23X3,MDL_RME21X1 - $ ,MDL_RME23X3,MDL_RML21X1,MDL_RML23X3,MDL_RMX1,MDL_RMX2,MDL_RMX3 - $ ,MDL_MHD2,MDL_MHU2,MDL_RMQ21X1,MDL_RMQ23X3,MDL_RMU21X1 - $ ,MDL_RMU23X3,MDL_RNN1X1,MDL_RNN1X2,MDL_RNN1X3,MDL_RNN1X4 - $ ,MDL_RNN2X1,MDL_RNN2X2,MDL_RNN2X3,MDL_RNN2X4,MDL_RNN3X1 - $ ,MDL_RNN3X2,MDL_RNN3X3,MDL_RNN3X4,MDL_RNN4X1,MDL_RNN4X2 - $ ,MDL_RNN4X3,MDL_RNN4X4,MDL_RRL1X1,MDL_RRL2X2,MDL_RRL3X3 - $ ,MDL_RRL3X6,MDL_RRL4X4,MDL_RRL5X5,MDL_RRL6X3,MDL_RRL6X6,AEWM1 - $ ,AS,MDL_RRN1X1,MDL_RRN2X2,MDL_RRN3X3,MDL_RTD3X3,MDL_RTE3X3 + COMMON/PARAMS_R/ MDL_SQRT__AS,MDL_G__EXP__2,MDL_MSD5,MDL_MSD2 + $ ,MDL_MSU5,MDL_MSU2,MDL_MSL5,MDL_MSL2,MDL_MSN2,MDL_RMU22X2 + $ ,MDL_RMQ22X2,MDL_RML22X2,MDL_RME22X2,MDL_RMD22X2 + $ ,MDL_CONJG__RN3X3,MDL_CONJG__CKM3X3,MDL_RU4X4,MDL_RU1X1 + $ ,MDL_RN3X3,MDL_RN1X1,MDL_RL4X4,MDL_RL1X1,MDL_RD4X4,MDL_RD1X1 + $ ,MDL_I98X11,MDL_I97X11,MDL_I96X11,MDL_I93X11,MDL_I92X11 + $ ,MDL_I87X11,MDL_I82X11,MDL_I74X11,MDL_I6X44,MDL_I5X11 + $ ,MDL_I53X11,MDL_I52X44,MDL_I51X11,MDL_I39X11,MDL_I31X11 + $ ,MDL_I26X44,MDL_I25X11,MDL_I12X11,MDL_I102X44,MDL_I101X44 + $ ,MDL_I100X44,MDL_CKM3X3,MDL_ATAN__TB,MDL_BETA,MDL_CW + $ ,MDL_MZ__EXP__2,MDL_CW__EXP__2,MDL_SW,MDL_COS__BETA + $ ,MDL_SIN__BETA,MDL_SQRT__2,MDL_SW__EXP__2,MDL_COS__ALP + $ ,MDL_SIN__ALP,MDL_EE,MDL_GP,MDL_GW,MDL_VEV,MDL_VD,MDL_VU + $ ,MDL_EE__EXP__2,MDL_RRD1X1,MDL_RRD2X2,MDL_RRD3X3,MDL_RRD3X6 + $ ,MDL_RRD4X4,MDL_RRD5X5,MDL_RRD6X3,MDL_RRD6X6,MDL_ALP,MDL_RMUH + $ ,MDL_TB,MDL_MA2,MDL_RMD21X1,MDL_RMD23X3,MDL_RME21X1,MDL_RME23X3 + $ ,MDL_RML21X1,MDL_RML23X3,MDL_RMX1,MDL_RMX2,MDL_RMX3,MDL_MHD2 + $ ,MDL_MHU2,MDL_RMQ21X1,MDL_RMQ23X3,MDL_RMU21X1,MDL_RMU23X3 + $ ,MDL_RNN1X1,MDL_RNN1X2,MDL_RNN1X3,MDL_RNN1X4,MDL_RNN2X1 + $ ,MDL_RNN2X2,MDL_RNN2X3,MDL_RNN2X4,MDL_RNN3X1,MDL_RNN3X2 + $ ,MDL_RNN3X3,MDL_RNN3X4,MDL_RNN4X1,MDL_RNN4X2,MDL_RNN4X3 + $ ,MDL_RNN4X4,MDL_RRL1X1,MDL_RRL2X2,MDL_RRL3X3,MDL_RRL3X6 + $ ,MDL_RRL4X4,MDL_RRL5X5,MDL_RRL6X3,MDL_RRL6X6,AEWM1,AS + $ ,MDL_RRN1X1,MDL_RRN2X2,MDL_RRN3X3,MDL_RTD3X3,MDL_RTE3X3 $ ,MDL_RTU3X3,MDL_RUU1X1,MDL_RUU1X2,MDL_RUU2X1,MDL_RUU2X2 $ ,MDL_RMNS1X1,MDL_RMNS2X2,MDL_RMNS3X3,MDL_RRU1X1,MDL_RRU2X2 $ ,MDL_RRU3X3,MDL_RRU3X6,MDL_RRU4X4,MDL_RRU5X5,MDL_RRU6X3 @@ -63,18 +65,18 @@ ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc $ ,MDL_RYU3X3 - DOUBLE COMPLEX MDL_G__EXP__2,MDL_MD21X1,MDL_MD22X2,MDL_MD23X3 - $ ,MDL_ME21X1,MDL_ME22X2,MDL_ME23X3,MDL_ML21X1,MDL_ML22X2 - $ ,MDL_ML23X3,MDL_MQ21X1,MDL_MQ22X2,MDL_MQ23X3,MDL_MU21X1 - $ ,MDL_MU22X2,MDL_MU23X3,MDL_MUH,MDL_MX1,MDL_MX2,MDL_MX3 - $ ,MDL_NN1X1,MDL_NN1X2,MDL_NN1X3,MDL_NN1X4,MDL_NN2X1,MDL_NN2X2 - $ ,MDL_NN2X3,MDL_NN2X4,MDL_NN3X1,MDL_NN3X2,MDL_NN3X3,MDL_NN3X4 - $ ,MDL_NN4X1,MDL_NN4X2,MDL_NN4X3,MDL_NN4X4,MDL_RD3X3,MDL_RD3X6 - $ ,MDL_RD6X3,MDL_RD6X6,MDL_RL3X3,MDL_RL3X6,MDL_RL6X3,MDL_RL6X6 - $ ,MDL_RU3X3,MDL_RU3X6,MDL_RU6X3,MDL_RU6X6,MDL_UU1X1,MDL_UU1X2 - $ ,MDL_UU2X1,MDL_UU2X2,MDL_VV1X1,MDL_VV1X2,MDL_VV2X1,MDL_VV2X2 - $ ,MDL_TD3X3,MDL_TE3X3,MDL_TU3X3,MDL_YD3X3,MDL_YE3X3,MDL_YU3X3 - $ ,MDL_BB,MDL_CONJG__YU3X3,MDL_I1X33,MDL_CONJG__YD3X3,MDL_I10X33 + DOUBLE COMPLEX MDL_MD21X1,MDL_MD22X2,MDL_MD23X3,MDL_ME21X1 + $ ,MDL_ME22X2,MDL_ME23X3,MDL_ML21X1,MDL_ML22X2,MDL_ML23X3 + $ ,MDL_MQ21X1,MDL_MQ22X2,MDL_MQ23X3,MDL_MU21X1,MDL_MU22X2 + $ ,MDL_MU23X3,MDL_MUH,MDL_MX1,MDL_MX2,MDL_MX3,MDL_NN1X1,MDL_NN1X2 + $ ,MDL_NN1X3,MDL_NN1X4,MDL_NN2X1,MDL_NN2X2,MDL_NN2X3,MDL_NN2X4 + $ ,MDL_NN3X1,MDL_NN3X2,MDL_NN3X3,MDL_NN3X4,MDL_NN4X1,MDL_NN4X2 + $ ,MDL_NN4X3,MDL_NN4X4,MDL_RD3X3,MDL_RD3X6,MDL_RD6X3,MDL_RD6X6 + $ ,MDL_RL3X3,MDL_RL3X6,MDL_RL6X3,MDL_RL6X6,MDL_RU3X3,MDL_RU3X6 + $ ,MDL_RU6X3,MDL_RU6X6,MDL_UU1X1,MDL_UU1X2,MDL_UU2X1,MDL_UU2X2 + $ ,MDL_VV1X1,MDL_VV1X2,MDL_VV2X1,MDL_VV2X2,MDL_TD3X3,MDL_TE3X3 + $ ,MDL_TU3X3,MDL_YD3X3,MDL_YE3X3,MDL_YU3X3,MDL_BB + $ ,MDL_CONJG__YU3X3,MDL_I1X33,MDL_CONJG__YD3X3,MDL_I10X33 $ ,MDL_I10X36,MDL_CONJG__RD3X6,MDL_I100X33,MDL_I100X36 $ ,MDL_CONJG__RD6X6,MDL_I100X63,MDL_I100X66,MDL_CONJG__RL3X6 $ ,MDL_I101X33,MDL_I101X36,MDL_CONJG__RL6X6,MDL_I101X63 @@ -148,18 +150,18 @@ ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc $ ,MDL_CONJG__UU2X2,MDL_CONJG__VV1X1,MDL_CONJG__VV1X2 $ ,MDL_CONJG__VV2X1,MDL_CONJG__VV2X2,MDL_CONJG__MUH - COMMON/PARAMS_C/ MDL_G__EXP__2,MDL_MD21X1,MDL_MD22X2,MDL_MD23X3 - $ ,MDL_ME21X1,MDL_ME22X2,MDL_ME23X3,MDL_ML21X1,MDL_ML22X2 - $ ,MDL_ML23X3,MDL_MQ21X1,MDL_MQ22X2,MDL_MQ23X3,MDL_MU21X1 - $ ,MDL_MU22X2,MDL_MU23X3,MDL_MUH,MDL_MX1,MDL_MX2,MDL_MX3 - $ ,MDL_NN1X1,MDL_NN1X2,MDL_NN1X3,MDL_NN1X4,MDL_NN2X1,MDL_NN2X2 - $ ,MDL_NN2X3,MDL_NN2X4,MDL_NN3X1,MDL_NN3X2,MDL_NN3X3,MDL_NN3X4 - $ ,MDL_NN4X1,MDL_NN4X2,MDL_NN4X3,MDL_NN4X4,MDL_RD3X3,MDL_RD3X6 - $ ,MDL_RD6X3,MDL_RD6X6,MDL_RL3X3,MDL_RL3X6,MDL_RL6X3,MDL_RL6X6 - $ ,MDL_RU3X3,MDL_RU3X6,MDL_RU6X3,MDL_RU6X6,MDL_UU1X1,MDL_UU1X2 - $ ,MDL_UU2X1,MDL_UU2X2,MDL_VV1X1,MDL_VV1X2,MDL_VV2X1,MDL_VV2X2 - $ ,MDL_TD3X3,MDL_TE3X3,MDL_TU3X3,MDL_YD3X3,MDL_YE3X3,MDL_YU3X3 - $ ,MDL_BB,MDL_CONJG__YU3X3,MDL_I1X33,MDL_CONJG__YD3X3,MDL_I10X33 + COMMON/PARAMS_C/ MDL_MD21X1,MDL_MD22X2,MDL_MD23X3,MDL_ME21X1 + $ ,MDL_ME22X2,MDL_ME23X3,MDL_ML21X1,MDL_ML22X2,MDL_ML23X3 + $ ,MDL_MQ21X1,MDL_MQ22X2,MDL_MQ23X3,MDL_MU21X1,MDL_MU22X2 + $ ,MDL_MU23X3,MDL_MUH,MDL_MX1,MDL_MX2,MDL_MX3,MDL_NN1X1,MDL_NN1X2 + $ ,MDL_NN1X3,MDL_NN1X4,MDL_NN2X1,MDL_NN2X2,MDL_NN2X3,MDL_NN2X4 + $ ,MDL_NN3X1,MDL_NN3X2,MDL_NN3X3,MDL_NN3X4,MDL_NN4X1,MDL_NN4X2 + $ ,MDL_NN4X3,MDL_NN4X4,MDL_RD3X3,MDL_RD3X6,MDL_RD6X3,MDL_RD6X6 + $ ,MDL_RL3X3,MDL_RL3X6,MDL_RL6X3,MDL_RL6X6,MDL_RU3X3,MDL_RU3X6 + $ ,MDL_RU6X3,MDL_RU6X6,MDL_UU1X1,MDL_UU1X2,MDL_UU2X1,MDL_UU2X2 + $ ,MDL_VV1X1,MDL_VV1X2,MDL_VV2X1,MDL_VV2X2,MDL_TD3X3,MDL_TE3X3 + $ ,MDL_TU3X3,MDL_YD3X3,MDL_YE3X3,MDL_YU3X3,MDL_BB + $ ,MDL_CONJG__YU3X3,MDL_I1X33,MDL_CONJG__YD3X3,MDL_I10X33 $ ,MDL_I10X36,MDL_CONJG__RD3X6,MDL_I100X33,MDL_I100X36 $ ,MDL_CONJG__RD6X6,MDL_I100X63,MDL_I100X66,MDL_CONJG__RL3X6 $ ,MDL_I101X33,MDL_I101X36,MDL_CONJG__RL6X6,MDL_I101X63 diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cudacpp.mk index f047530438..f8d86fd493 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cudacpp.mk @@ -54,65 +54,6 @@ endif #------------------------------------------------------------------------------- -#=== Configure common compiler flags for C++ and CUDA/HIP - -INCFLAGS = -I. -OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here - -# Dependency on src directory -MG5AMC_COMMONLIB = mg5amc_common -LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -INCFLAGS += -I../../src - -# Compiler-specific googletest build directory (#125 and #738) -ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) - override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) -else ifneq ($(shell $(CXX) --version | egrep '^clang'),) - override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) - override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else - override CXXNAME = unknown -endif -###$(info CXXNAME=$(CXXNAME)) -override CXXNAMESUFFIX = _$(CXXNAME) - -# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) -export CXXNAMESUFFIX - -# Dependency on test directory -# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) -# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) -###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation -###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation -TESTDIRCOMMON = ../../../../../test -TESTDIRLOCAL = ../../test -ifneq ($(wildcard $(GTEST_ROOT)),) - TESTDIR = -else ifneq ($(LOCALGTEST),) - TESTDIR=$(TESTDIRLOCAL) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) - TESTDIR = $(TESTDIRCOMMON) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else - TESTDIR = -endif -ifneq ($(GTEST_ROOT),) - GTESTLIBDIR = $(GTEST_ROOT)/lib64/ - GTESTLIBS = $(GTESTLIBDIR)/libgtest.a - GTESTINC = -I$(GTEST_ROOT)/include -else - GTESTLIBDIR = - GTESTLIBS = - GTESTINC = -endif -###$(info GTEST_ROOT = $(GTEST_ROOT)) -###$(info LOCALGTEST = $(LOCALGTEST)) -###$(info TESTDIR = $(TESTDIR)) - -#------------------------------------------------------------------------------- - #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available: eventually use native instead?) @@ -340,6 +281,69 @@ endif #------------------------------------------------------------------------------- +#=== Configure common compiler flags for C++ and CUDA/HIP + +INCFLAGS = -I. +OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here + +# Dependency on src directory +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif +LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +INCFLAGS += -I../../src + +# Compiler-specific googletest build directory (#125 and #738) +ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) + override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) +else ifneq ($(shell $(CXX) --version | egrep '^clang'),) + override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) + override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else + override CXXNAME = unknown +endif +###$(info CXXNAME=$(CXXNAME)) +override CXXNAMESUFFIX = _$(CXXNAME) + +# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) +export CXXNAMESUFFIX + +# Dependency on test directory +# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) +# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) +###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation +###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation +TESTDIRCOMMON = ../../../../../test +TESTDIRLOCAL = ../../test +ifneq ($(wildcard $(GTEST_ROOT)),) + TESTDIR = +else ifneq ($(LOCALGTEST),) + TESTDIR=$(TESTDIRLOCAL) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) + TESTDIR = $(TESTDIRCOMMON) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else + TESTDIR = +endif +ifneq ($(GTEST_ROOT),) + GTESTLIBDIR = $(GTEST_ROOT)/lib64/ + GTESTLIBS = $(GTESTLIBDIR)/libgtest.a + GTESTINC = -I$(GTEST_ROOT)/include +else + GTESTLIBDIR = + GTESTLIBS = + GTESTINC = +endif +###$(info GTEST_ROOT = $(GTEST_ROOT)) +###$(info LOCALGTEST = $(LOCALGTEST)) +###$(info TESTDIR = $(TESTDIR)) + +#------------------------------------------------------------------------------- + #=== Configure PowerPC-specific compiler flags for C++ and CUDA/HIP # PowerPC-specific CXX compiler flags (being reviewed) @@ -606,27 +610,25 @@ override RUNTIME = #=== Makefile TARGETS and build rules below #=============================================================================== -cxx_main=$(BUILDDIR)/check.exe -fcxx_main=$(BUILDDIR)/fcheck.exe -ifneq ($(GPUCC),) - gpu_main=$(BUILDDIR)/gcheck.exe - fgpu_main=$(BUILDDIR)/fgcheck.exe +ifeq ($(GPUCC),) + cxx_checkmain=$(BUILDDIR)/check_cpp.exe + cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe + cxx_testmain=$(BUILDDIR)/runTest_cpp.exe else - gpu_main= - fgpu_main= + gpu_checkmain=$(BUILDDIR)/check_$(GPUSUFFIX).exe + gpu_fcheckmain=$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe + gpu_testmain=$(BUILDDIR)/runTest_$(GPUSUFFIX).exe endif -testmain=$(BUILDDIR)/runTest.exe - # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) # First target (default goal) -ifneq ($(GPUCC),) -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_main) $(fgpu_main) $(if $(GTESTLIBS),$(testmain)) +ifeq ($(GPUCC),) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_checkmain) $(cxx_fcheckmain) $(if $(GTESTLIBS),$(cxx_testmain)) else -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_main) $(fcxx_main) $(if $(GTESTLIBS),$(testmain)) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_checkmain) $(gpu_fcheckmain) $(if $(GTESTLIBS),$(gpu_testmain)) endif # Target (and build options): debug @@ -643,30 +645,30 @@ $(BUILDDIR)/.build.$(TAG): @if [ "$(oldtagsb)" != "" ]; then echo "Cannot build for tag=$(TAG) as old builds exist for other tags:"; echo " $(oldtagsb)"; echo "Please run 'make clean' first\nIf 'make clean' is not enough: run 'make clean USEBUILDDIR=1 AVX=$(AVX) FPTYPE=$(FPTYPE)' or 'make cleanall'"; exit 1; fi @touch $(BUILDDIR)/.build.$(TAG) -# Apply special build flags only to CrossSectionKernel[_$(GPUSUFFIX)].o (no fast math, see #117 and #516) +# Apply special build flags only to CrossSectionKernel_.o (no fast math, see #117 and #516) # Added edgecase for HIP compilation ifeq ($(shell $(CXX) --version | grep ^nvc++),) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -fno-fast-math -$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math endif -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o (NVTX in timermap.h, #679) -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) +# Apply special build flags only to check_sa_.o (NVTX in timermap.h, #679) +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o and (Cu|Hip)randRandomNumberKernel[_$(GPUSUFFIX)].o -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(RNDCXXFLAGS) +# Apply special build flags only to check_sa_.o and (Cu|Hip)randRandomNumberKernel_.o +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) ifeq ($(HASCURAND),hasCurand) # curand headers, #679 -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUDA_INC) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(CUDA_INC) endif ifeq ($(HASHIPRAND),hasHiprand) # hiprand headers -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(HIP_INC) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(HIP_INC) endif # Avoid "warning: builtin __has_trivial_... is deprecated; use __is_trivially_... instead" in GPUCC with icx2023 (#592) @@ -679,21 +681,21 @@ endif # Avoid clang warning "overriding '-ffp-contract=fast' option with '-ffp-contract=on'" (#516) # This patch does remove the warning, but I prefer to keep it disabled for the moment... ###ifneq ($(shell $(CXX) --version | egrep '^(clang|Apple clang|Intel)'),) -###$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -Wno-overriding-t-option ###ifneq ($(GPUCC),) -###$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option ###endif ###endif #### Apply special build flags only to CPPProcess.o (-flto) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += -flto +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += -flto #### Apply special build flags only to CPPProcess.o (AVXFLAGS) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += $(AVXFLAGS) +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += $(AVXFLAGS) # Generic target and build rules: objects from C++ compilation # (NB do not include CUDA_INC here! add it only for NVTX or curand #679) -$(BUILDDIR)/%.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -718,8 +720,8 @@ processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') ###$(info processid_short=$(processid_short)) MG5AMC_CXXLIB = mg5amc_$(processid_short)_cpp -cxx_objects_lib=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o -cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel.o $(BUILDDIR)/RamboSamplingKernels.o +cxx_objects_lib=$(BUILDDIR)/CPPProcess_cpp.o $(BUILDDIR)/MatrixElementKernels_cpp.o $(BUILDDIR)/BridgeKernels_cpp.o $(BUILDDIR)/CrossSectionKernels_cpp.o +cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_cpp.o $(BUILDDIR)/RamboSamplingKernels_cpp.o ifneq ($(GPUCC),) MG5AMC_GPULIB = mg5amc_$(processid_short)_$(GPUSUFFIX) @@ -728,8 +730,8 @@ gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/ endif # Target (and build rules): C++ and CUDA/HIP shared libraries -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) @@ -756,63 +758,63 @@ endif #------------------------------------------------------------------------------- # Target (and build rules): C++ and CUDA/HIP standalone executables -$(cxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o - $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(RNDLIBFLAGS) +$(cxx_checkmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_checkmain): $(BUILDDIR)/check_sa_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o + $(CXX) -o $@ $(BUILDDIR)/check_sa_cpp.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o $(RNDLIBFLAGS) ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(gpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(gpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_checkmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_checkmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(gpu_main): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_checkmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(gpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(gpu_main): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o +$(gpu_checkmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_checkmain): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(GPUCC) -o $@ $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(RNDLIBFLAGS) endif #------------------------------------------------------------------------------- # Generic target and build rules: objects from Fortran compilation -$(BUILDDIR)/%.o : %.f *.inc +$(BUILDDIR)/%_fortran.o : %.f *.inc @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(FC) -I. -c $< -o $@ # Generic target and build rules: objects from Fortran compilation -###$(BUILDDIR)/%.o : %.f *.inc +###$(BUILDDIR)/%_fortran.o : %.f *.inc ### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi ### @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi ### $(FC) -I. -I$(INCDIR) -c $< -o $@ # Target (and build rules): Fortran standalone executables -###$(BUILDDIR)/fcheck_sa.o : $(INCDIR)/fbridge.inc +###$(BUILDDIR)/fcheck_sa_fortran.o : $(INCDIR)/fbridge.inc ifeq ($(UNAME_S),Darwin) -$(fcxx_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(cxx_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fcxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fcxx_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) +$(cxx_fcheckmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ else - $(CXX) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) + $(CXX) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) endif ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(fgpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(fgpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_fcheckmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_fcheckmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') endif ifeq ($(UNAME_S),Darwin) -$(fgpu_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(gpu_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fgpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fgpu_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) +$(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else - $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) + $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif endif @@ -820,83 +822,94 @@ endif # Target (and build rules): test objects and test executable ifeq ($(GPUCC),) -$(BUILDDIR)/testxxx.o: $(GTESTLIBS) -$(BUILDDIR)/testxxx.o: INCFLAGS += $(GTESTINC) -$(BUILDDIR)/testxxx.o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testxxx.o # Comment out this line to skip the C++ test of xxx functions +$(BUILDDIR)/testxxx_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testxxx_cpp.o: INCFLAGS += $(GTESTINC) +$(BUILDDIR)/testxxx_cpp.o: testxxx_cc_ref.txt +$(cxx_testmain): $(BUILDDIR)/testxxx_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testxxx_cpp.o # Comment out this line to skip the C++ test of xxx functions else $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions +$(gpu_testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions endif ifneq ($(UNAME_S),Darwin) # Disable testmisc on Darwin (workaround for issue #838) ifeq ($(GPUCC),) -$(BUILDDIR)/testmisc.o: $(GTESTLIBS) -$(BUILDDIR)/testmisc.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testmisc.o # Comment out this line to skip the C++ miscellaneous tests +$(BUILDDIR)/testmisc_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testmisc_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/testmisc_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testmisc_cpp.o # Comment out this line to skip the C++ miscellaneous tests else $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests +$(gpu_testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests endif endif ifeq ($(GPUCC),) -$(BUILDDIR)/runTest.o: $(GTESTLIBS) -$(BUILDDIR)/runTest.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/runTest.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/runTest.o +$(BUILDDIR)/runTest_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/runTest_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/runTest_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/runTest_cpp.o else $(BUILDDIR)/runTest_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/runTest_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o endif -$(testmain): $(GTESTLIBS) -$(testmain): INCFLAGS += $(GTESTINC) -$(testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest -###$(testmain): LIBFLAGS += -lgtest_main # no longer necessary since we added main() to testxxx.cc +ifeq ($(GPUCC),) +$(cxx_testmain): $(GTESTLIBS) +$(cxx_testmain): INCFLAGS += $(GTESTINC) +$(cxx_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +else +$(gpu_testmain): $(GTESTLIBS) +$(gpu_testmain): INCFLAGS += $(GTESTINC) +$(gpu_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +endif +ifeq ($(GPUCC),) # if at all, OMP is used only in CXX builds (not in GPU builds) ifneq ($(OMPFLAGS),) ifneq ($(shell $(CXX) --version | egrep '^Intel'),) -$(testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) +$(cxx_testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) else ifneq ($(shell $(CXX) --version | egrep '^clang'),) -$(testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 +$(cxx_testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 ###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -###$(testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) +###$(cxx_testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) else -$(testmain): LIBFLAGS += -lgomp +$(cxx_testmain): LIBFLAGS += -lgomp +endif endif endif # Test quadmath in testmisc.cc tests for constexpr_math #627 -###$(testmain): LIBFLAGS += -lquadmath +###ifeq ($(GPUCC),) +###$(cxx_testmain): LIBFLAGS += -lquadmath +###else +###$(gpu_testmain): LIBFLAGS += -lquadmath +###endif # Bypass std::filesystem completely to ease portability on LUMI #803 -#ifneq ($(findstring hipcc,$(GPUCC)),) -#$(testmain): LIBFLAGS += -lstdc++fs -#endif +###ifneq ($(findstring hipcc,$(GPUCC)),) +###$(gpu_testmain): LIBFLAGS += -lstdc++fs +###endif -ifeq ($(GPUCC),) # link only runTest.o -$(testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) +ifeq ($(GPUCC),) # link only runTest_cpp.o +$(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) $(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS) -else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest.o and runTest_$(GPUSUFFIX).o) -$(testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) +else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o) +$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else @@ -1051,51 +1064,50 @@ endif #------------------------------------------------------------------------------- -# Target: check (run the C++ test executable) -# [NB THIS IS WHAT IS USED IN THE GITHUB CI!] -# [FIXME: SHOULD CHANGE THE TARGET NAME "check" THAT HAS NOTHING TO DO WITH "check.exe"] -ifneq ($(GPUCC),) -check: runTest cmpFGcheck -else -check: runTest cmpFcheck -endif +# Target: 'make test' (execute runTest.exe, and compare check.exe with fcheck.exe) +# [NB: THIS IS WHAT IS TESTED IN THE GITHUB CI!] +# [NB: This used to be called 'make check' but the name has been changed as this has nothing to do with 'check.exe'] +test: runTest cmpFcheck -# Target: runTest (run the C++ test executable runTest.exe) +# Target: runTest (run the C++ or CUDA/HIP test executable runTest.exe) runTest: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/runTest.exe +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/runTest_cpp.exe +else + $(RUNTIME) $(BUILDDIR)/runTest_$(GPUSUFFIX).exe +endif -# Target: runCheck (run the C++ standalone executable check.exe, with a small number of events) +# Target: runCheck (run the C++ or CUDA/HIP standalone executable check.exe, with a small number of events) runCheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/check.exe -p 2 32 2 - -# Target: runGcheck (run the CUDA/HIP standalone executable gcheck.exe, with a small number of events) -runGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/gcheck.exe -p 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/check_cpp.exe -p 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +endif -# Target: runFcheck (run the Fortran standalone executable - with C++ MEs - fcheck.exe, with a small number of events) +# Target: runFcheck (run the Fortran standalone executable - with C++ or CUDA/HIP MEs - fcheck.exe, with a small number of events) runFcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 - -# Target: runFGcheck (run the Fortran standalone executable - with CUDA/HIP MEs - fgcheck.exe, with a small number of events) -runFGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 +endif -# Target: cmpFcheck (compare ME results from the C++ and Fortran with C++ MEs standalone executables, with a small number of events) +# Target: cmpFcheck (compare ME results from the C++/CUDA/HIP and Fortran with C++/CUDA/HIP MEs standalone executables, with a small number of events) cmpFcheck: all.$(TAG) @echo - @echo "$(BUILDDIR)/check.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/check.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi - -# Target: cmpFGcheck (compare ME results from the CUDA/HIP and Fortran with CUDA/HIP MEs standalone executables, with a small number of events) -cmpFGcheck: all.$(TAG) - @echo - @echo "$(BUILDDIR)/gcheck.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fgcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/gcheck.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +ifeq ($(GPUCC),) + @echo "$(BUILDDIR)/check_cpp.exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_cpp.exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_cpp.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +else + @echo "$(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +endif # Target: cuda-memcheck (run the CUDA standalone executable gcheck.exe with a small number of events through cuda-memcheck) cuda-memcheck: all.$(TAG) - $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/gcheck.exe -p 2 32 2 + $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/makefile b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/makefile index 22517d0cc6..6a21300f7a 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/makefile +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/makefile @@ -58,13 +58,15 @@ endif LINKLIBS = $(LINK_MADLOOP_LIB) $(LINK_LOOP_LIBS) -L$(LIBDIR) -ldhelas -ldsample -lmodel -lgeneric -lpdf -lcernlib $(llhapdf) -lbias CUDACPP_MAKEFILE=cudacpp.mk -CUDACPP_COMMONLIB=mg5amc_common processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') ifeq ($(BACKEND),cuda) +CUDACPP_COMMONLIB=mg5amc_common_cuda CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_cuda else ifeq ($(BACKEND),hip) +CUDACPP_COMMONLIB=mg5amc_common_hip CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_hip else +CUDACPP_COMMONLIB=mg5amc_common_cpp CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_cpp endif diff --git a/epochX/cudacpp/susy_gg_tt.mad/src/Parameters_MSSM_SLHA2.h b/epochX/cudacpp/susy_gg_tt.mad/src/Parameters_MSSM_SLHA2.h index 19eaafe599..6b9cead062 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/src/Parameters_MSSM_SLHA2.h +++ b/epochX/cudacpp/susy_gg_tt.mad/src/Parameters_MSSM_SLHA2.h @@ -58,8 +58,7 @@ namespace mg5amcCpu // (none) // Model parameters dependent on aS - //double mdl_sqrt__aS, G; // now computed event-by-event (running alphas #373) - //cxsmpl mdl_G__exp__2; // now computed event-by-event (running alphas #373) + //double mdl_sqrt__aS, G, mdl_G__exp__2; // now computed event-by-event (running alphas #373) // Model couplings dependent on aS //cxsmpl GC_6, GC_51; // now computed event-by-event (running alphas #373) @@ -750,7 +749,7 @@ namespace mg5amcCpu // Model parameters dependent on aS //constexpr double mdl_sqrt__aS = constexpr_sqrt( aS ); // now computed event-by-event (running alphas #373) //constexpr double G = 2. * mdl_sqrt__aS * constexpr_sqrt( M_PI ); // now computed event-by-event (running alphas #373) - //constexpr cxsmpl mdl_G__exp__2 = ( ( G ) * ( G ) ); // now computed event-by-event (running alphas #373) + //constexpr double mdl_G__exp__2 = ( ( G ) * ( G ) ); // now computed event-by-event (running alphas #373) // Model couplings dependent on aS //constexpr cxsmpl GC_6 = -G; // now computed event-by-event (running alphas #373) diff --git a/epochX/cudacpp/susy_gg_tt.mad/src/cudacpp_src.mk b/epochX/cudacpp/susy_gg_tt.mad/src/cudacpp_src.mk index 4fe7fcd011..ebb1d4d1fa 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/src/cudacpp_src.mk +++ b/epochX/cudacpp/susy_gg_tt.mad/src/cudacpp_src.mk @@ -98,7 +98,11 @@ endif # NB1: there are no CUDA targets in src as we avoid RDC! # NB2: CUDA includes for curand.h are no longer needed in the C++ code anywhere in src! -MG5AMC_COMMONLIB = mg5amc_common +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) @@ -126,7 +130,7 @@ $(LIBDIR)/.build.$(TAG): #------------------------------------------------------------------------------- # Generic target and build rules: objects from C++ compilation -$(BUILDDIR)/%.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -139,22 +143,22 @@ endif #------------------------------------------------------------------------------- -cxx_objects=$(addprefix $(BUILDDIR)/, read_slha.o) -ifneq ($(GPUCC),) - gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_MSSM_SLHA2_$(GPUSUFFIX).o) +cxx_objects=$(addprefix $(BUILDDIR)/, read_slha_cpp.o) +ifeq ($(GPUCC),) + cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_MSSM_SLHA2_cpp.o) else - cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_MSSM_SLHA2.o) + gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_MSSM_SLHA2_$(GPUSUFFIX).o) endif # Target (and build rules): common (src) library -ifneq ($(GPUCC),) -$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) - @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) -else +ifeq ($(GPUCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) +else +$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) + @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi + $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt index 8d75a175ae..ba9e415fc7 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt @@ -51,14 +51,17 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 set zerowidth_tchannel F import model MSSM_SLHA2 +INFO: load particles +INFO: load vertices +DEBUG: model prefixing takes 0.8075113296508789  INFO: Restrict model MSSM_SLHA2 with file models/MSSM_SLHA2/restrict_default.dat . INFO: Detect SLHA2 format. keeping restricted parameter in the param_card DEBUG: Simplifying conditional expressions  @@ -554,7 +557,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.136 s +1 processes with 3 diagrams generated in 0.107 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_tt Load PLUGIN.CUDACPP_OUTPUT @@ -582,7 +585,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.145 s +ALOHA: aloha creates 2 routines in 0.129 s VVV1 FFV1 FFV1 @@ -597,7 +600,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. quit -real 0m1.370s -user 0m1.281s -sys 0m0.073s -Code generation completed in 2 seconds +real 0m2.342s +user 0m2.232s +sys 0m0.076s +Code generation completed in 3 seconds diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/cudacpp.mk index f047530438..f8d86fd493 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/cudacpp.mk @@ -54,65 +54,6 @@ endif #------------------------------------------------------------------------------- -#=== Configure common compiler flags for C++ and CUDA/HIP - -INCFLAGS = -I. -OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here - -# Dependency on src directory -MG5AMC_COMMONLIB = mg5amc_common -LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -INCFLAGS += -I../../src - -# Compiler-specific googletest build directory (#125 and #738) -ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) - override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) -else ifneq ($(shell $(CXX) --version | egrep '^clang'),) - override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) - override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else - override CXXNAME = unknown -endif -###$(info CXXNAME=$(CXXNAME)) -override CXXNAMESUFFIX = _$(CXXNAME) - -# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) -export CXXNAMESUFFIX - -# Dependency on test directory -# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) -# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) -###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation -###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation -TESTDIRCOMMON = ../../../../../test -TESTDIRLOCAL = ../../test -ifneq ($(wildcard $(GTEST_ROOT)),) - TESTDIR = -else ifneq ($(LOCALGTEST),) - TESTDIR=$(TESTDIRLOCAL) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) - TESTDIR = $(TESTDIRCOMMON) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else - TESTDIR = -endif -ifneq ($(GTEST_ROOT),) - GTESTLIBDIR = $(GTEST_ROOT)/lib64/ - GTESTLIBS = $(GTESTLIBDIR)/libgtest.a - GTESTINC = -I$(GTEST_ROOT)/include -else - GTESTLIBDIR = - GTESTLIBS = - GTESTINC = -endif -###$(info GTEST_ROOT = $(GTEST_ROOT)) -###$(info LOCALGTEST = $(LOCALGTEST)) -###$(info TESTDIR = $(TESTDIR)) - -#------------------------------------------------------------------------------- - #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available: eventually use native instead?) @@ -340,6 +281,69 @@ endif #------------------------------------------------------------------------------- +#=== Configure common compiler flags for C++ and CUDA/HIP + +INCFLAGS = -I. +OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here + +# Dependency on src directory +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif +LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +INCFLAGS += -I../../src + +# Compiler-specific googletest build directory (#125 and #738) +ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) + override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) +else ifneq ($(shell $(CXX) --version | egrep '^clang'),) + override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) + override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else + override CXXNAME = unknown +endif +###$(info CXXNAME=$(CXXNAME)) +override CXXNAMESUFFIX = _$(CXXNAME) + +# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) +export CXXNAMESUFFIX + +# Dependency on test directory +# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) +# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) +###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation +###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation +TESTDIRCOMMON = ../../../../../test +TESTDIRLOCAL = ../../test +ifneq ($(wildcard $(GTEST_ROOT)),) + TESTDIR = +else ifneq ($(LOCALGTEST),) + TESTDIR=$(TESTDIRLOCAL) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) + TESTDIR = $(TESTDIRCOMMON) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else + TESTDIR = +endif +ifneq ($(GTEST_ROOT),) + GTESTLIBDIR = $(GTEST_ROOT)/lib64/ + GTESTLIBS = $(GTESTLIBDIR)/libgtest.a + GTESTINC = -I$(GTEST_ROOT)/include +else + GTESTLIBDIR = + GTESTLIBS = + GTESTINC = +endif +###$(info GTEST_ROOT = $(GTEST_ROOT)) +###$(info LOCALGTEST = $(LOCALGTEST)) +###$(info TESTDIR = $(TESTDIR)) + +#------------------------------------------------------------------------------- + #=== Configure PowerPC-specific compiler flags for C++ and CUDA/HIP # PowerPC-specific CXX compiler flags (being reviewed) @@ -606,27 +610,25 @@ override RUNTIME = #=== Makefile TARGETS and build rules below #=============================================================================== -cxx_main=$(BUILDDIR)/check.exe -fcxx_main=$(BUILDDIR)/fcheck.exe -ifneq ($(GPUCC),) - gpu_main=$(BUILDDIR)/gcheck.exe - fgpu_main=$(BUILDDIR)/fgcheck.exe +ifeq ($(GPUCC),) + cxx_checkmain=$(BUILDDIR)/check_cpp.exe + cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe + cxx_testmain=$(BUILDDIR)/runTest_cpp.exe else - gpu_main= - fgpu_main= + gpu_checkmain=$(BUILDDIR)/check_$(GPUSUFFIX).exe + gpu_fcheckmain=$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe + gpu_testmain=$(BUILDDIR)/runTest_$(GPUSUFFIX).exe endif -testmain=$(BUILDDIR)/runTest.exe - # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) # First target (default goal) -ifneq ($(GPUCC),) -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_main) $(fgpu_main) $(if $(GTESTLIBS),$(testmain)) +ifeq ($(GPUCC),) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_checkmain) $(cxx_fcheckmain) $(if $(GTESTLIBS),$(cxx_testmain)) else -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_main) $(fcxx_main) $(if $(GTESTLIBS),$(testmain)) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_checkmain) $(gpu_fcheckmain) $(if $(GTESTLIBS),$(gpu_testmain)) endif # Target (and build options): debug @@ -643,30 +645,30 @@ $(BUILDDIR)/.build.$(TAG): @if [ "$(oldtagsb)" != "" ]; then echo "Cannot build for tag=$(TAG) as old builds exist for other tags:"; echo " $(oldtagsb)"; echo "Please run 'make clean' first\nIf 'make clean' is not enough: run 'make clean USEBUILDDIR=1 AVX=$(AVX) FPTYPE=$(FPTYPE)' or 'make cleanall'"; exit 1; fi @touch $(BUILDDIR)/.build.$(TAG) -# Apply special build flags only to CrossSectionKernel[_$(GPUSUFFIX)].o (no fast math, see #117 and #516) +# Apply special build flags only to CrossSectionKernel_.o (no fast math, see #117 and #516) # Added edgecase for HIP compilation ifeq ($(shell $(CXX) --version | grep ^nvc++),) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -fno-fast-math -$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math endif -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o (NVTX in timermap.h, #679) -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) +# Apply special build flags only to check_sa_.o (NVTX in timermap.h, #679) +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) -# Apply special build flags only to check_sa[_$(GPUSUFFIX)].o and (Cu|Hip)randRandomNumberKernel[_$(GPUSUFFIX)].o -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(RNDCXXFLAGS) +# Apply special build flags only to check_sa_.o and (Cu|Hip)randRandomNumberKernel_.o +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) ifeq ($(HASCURAND),hasCurand) # curand headers, #679 -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUDA_INC) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(CUDA_INC) endif ifeq ($(HASHIPRAND),hasHiprand) # hiprand headers -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(HIP_INC) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(HIP_INC) endif # Avoid "warning: builtin __has_trivial_... is deprecated; use __is_trivially_... instead" in GPUCC with icx2023 (#592) @@ -679,21 +681,21 @@ endif # Avoid clang warning "overriding '-ffp-contract=fast' option with '-ffp-contract=on'" (#516) # This patch does remove the warning, but I prefer to keep it disabled for the moment... ###ifneq ($(shell $(CXX) --version | egrep '^(clang|Apple clang|Intel)'),) -###$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -Wno-overriding-t-option ###ifneq ($(GPUCC),) -###$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option +###$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option ###endif ###endif #### Apply special build flags only to CPPProcess.o (-flto) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += -flto +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += -flto #### Apply special build flags only to CPPProcess.o (AVXFLAGS) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += $(AVXFLAGS) +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += $(AVXFLAGS) # Generic target and build rules: objects from C++ compilation # (NB do not include CUDA_INC here! add it only for NVTX or curand #679) -$(BUILDDIR)/%.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -718,8 +720,8 @@ processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') ###$(info processid_short=$(processid_short)) MG5AMC_CXXLIB = mg5amc_$(processid_short)_cpp -cxx_objects_lib=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o -cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel.o $(BUILDDIR)/RamboSamplingKernels.o +cxx_objects_lib=$(BUILDDIR)/CPPProcess_cpp.o $(BUILDDIR)/MatrixElementKernels_cpp.o $(BUILDDIR)/BridgeKernels_cpp.o $(BUILDDIR)/CrossSectionKernels_cpp.o +cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_cpp.o $(BUILDDIR)/RamboSamplingKernels_cpp.o ifneq ($(GPUCC),) MG5AMC_GPULIB = mg5amc_$(processid_short)_$(GPUSUFFIX) @@ -728,8 +730,8 @@ gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/ endif # Target (and build rules): C++ and CUDA/HIP shared libraries -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) @@ -756,63 +758,63 @@ endif #------------------------------------------------------------------------------- # Target (and build rules): C++ and CUDA/HIP standalone executables -$(cxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o - $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(RNDLIBFLAGS) +$(cxx_checkmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_checkmain): $(BUILDDIR)/check_sa_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o + $(CXX) -o $@ $(BUILDDIR)/check_sa_cpp.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o $(RNDLIBFLAGS) ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(gpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(gpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_checkmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_checkmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(gpu_main): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_checkmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(gpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(gpu_main): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o +$(gpu_checkmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_checkmain): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(GPUCC) -o $@ $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(RNDLIBFLAGS) endif #------------------------------------------------------------------------------- # Generic target and build rules: objects from Fortran compilation -$(BUILDDIR)/%.o : %.f *.inc +$(BUILDDIR)/%_fortran.o : %.f *.inc @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(FC) -I. -c $< -o $@ # Generic target and build rules: objects from Fortran compilation -###$(BUILDDIR)/%.o : %.f *.inc +###$(BUILDDIR)/%_fortran.o : %.f *.inc ### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi ### @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi ### $(FC) -I. -I$(INCDIR) -c $< -o $@ # Target (and build rules): Fortran standalone executables -###$(BUILDDIR)/fcheck_sa.o : $(INCDIR)/fbridge.inc +###$(BUILDDIR)/fcheck_sa_fortran.o : $(INCDIR)/fbridge.inc ifeq ($(UNAME_S),Darwin) -$(fcxx_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(cxx_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fcxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fcxx_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) +$(cxx_fcheckmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ else - $(CXX) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) + $(CXX) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) endif ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(fgpu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(fgpu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_fcheckmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_fcheckmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') endif ifeq ($(UNAME_S),Darwin) -$(fgpu_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +$(gpu_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 endif -$(fgpu_main): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fgpu_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) +$(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else - $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) + $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif endif @@ -820,83 +822,94 @@ endif # Target (and build rules): test objects and test executable ifeq ($(GPUCC),) -$(BUILDDIR)/testxxx.o: $(GTESTLIBS) -$(BUILDDIR)/testxxx.o: INCFLAGS += $(GTESTINC) -$(BUILDDIR)/testxxx.o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testxxx.o # Comment out this line to skip the C++ test of xxx functions +$(BUILDDIR)/testxxx_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testxxx_cpp.o: INCFLAGS += $(GTESTINC) +$(BUILDDIR)/testxxx_cpp.o: testxxx_cc_ref.txt +$(cxx_testmain): $(BUILDDIR)/testxxx_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testxxx_cpp.o # Comment out this line to skip the C++ test of xxx functions else $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions +$(gpu_testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions endif ifneq ($(UNAME_S),Darwin) # Disable testmisc on Darwin (workaround for issue #838) ifeq ($(GPUCC),) -$(BUILDDIR)/testmisc.o: $(GTESTLIBS) -$(BUILDDIR)/testmisc.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testmisc.o # Comment out this line to skip the C++ miscellaneous tests +$(BUILDDIR)/testmisc_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testmisc_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/testmisc_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testmisc_cpp.o # Comment out this line to skip the C++ miscellaneous tests else $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests +$(gpu_testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests endif endif ifeq ($(GPUCC),) -$(BUILDDIR)/runTest.o: $(GTESTLIBS) -$(BUILDDIR)/runTest.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/runTest.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/runTest.o +$(BUILDDIR)/runTest_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/runTest_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/runTest_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/runTest_cpp.o else $(BUILDDIR)/runTest_$(GPUSUFFIX).o: $(GTESTLIBS) $(BUILDDIR)/runTest_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +$(gpu_testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +$(gpu_testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif -$(testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o -$(testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o endif -$(testmain): $(GTESTLIBS) -$(testmain): INCFLAGS += $(GTESTINC) -$(testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest -###$(testmain): LIBFLAGS += -lgtest_main # no longer necessary since we added main() to testxxx.cc +ifeq ($(GPUCC),) +$(cxx_testmain): $(GTESTLIBS) +$(cxx_testmain): INCFLAGS += $(GTESTINC) +$(cxx_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +else +$(gpu_testmain): $(GTESTLIBS) +$(gpu_testmain): INCFLAGS += $(GTESTINC) +$(gpu_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +endif +ifeq ($(GPUCC),) # if at all, OMP is used only in CXX builds (not in GPU builds) ifneq ($(OMPFLAGS),) ifneq ($(shell $(CXX) --version | egrep '^Intel'),) -$(testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) +$(cxx_testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) else ifneq ($(shell $(CXX) --version | egrep '^clang'),) -$(testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 +$(cxx_testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 ###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -###$(testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) +###$(cxx_testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) else -$(testmain): LIBFLAGS += -lgomp +$(cxx_testmain): LIBFLAGS += -lgomp +endif endif endif # Test quadmath in testmisc.cc tests for constexpr_math #627 -###$(testmain): LIBFLAGS += -lquadmath +###ifeq ($(GPUCC),) +###$(cxx_testmain): LIBFLAGS += -lquadmath +###else +###$(gpu_testmain): LIBFLAGS += -lquadmath +###endif # Bypass std::filesystem completely to ease portability on LUMI #803 -#ifneq ($(findstring hipcc,$(GPUCC)),) -#$(testmain): LIBFLAGS += -lstdc++fs -#endif +###ifneq ($(findstring hipcc,$(GPUCC)),) +###$(gpu_testmain): LIBFLAGS += -lstdc++fs +###endif -ifeq ($(GPUCC),) # link only runTest.o -$(testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) +ifeq ($(GPUCC),) # link only runTest_cpp.o +$(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) $(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS) -else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest.o and runTest_$(GPUSUFFIX).o) -$(testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) +else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o) +$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 else @@ -1051,51 +1064,50 @@ endif #------------------------------------------------------------------------------- -# Target: check (run the C++ test executable) -# [NB THIS IS WHAT IS USED IN THE GITHUB CI!] -# [FIXME: SHOULD CHANGE THE TARGET NAME "check" THAT HAS NOTHING TO DO WITH "check.exe"] -ifneq ($(GPUCC),) -check: runTest cmpFGcheck -else -check: runTest cmpFcheck -endif +# Target: 'make test' (execute runTest.exe, and compare check.exe with fcheck.exe) +# [NB: THIS IS WHAT IS TESTED IN THE GITHUB CI!] +# [NB: This used to be called 'make check' but the name has been changed as this has nothing to do with 'check.exe'] +test: runTest cmpFcheck -# Target: runTest (run the C++ test executable runTest.exe) +# Target: runTest (run the C++ or CUDA/HIP test executable runTest.exe) runTest: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/runTest.exe +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/runTest_cpp.exe +else + $(RUNTIME) $(BUILDDIR)/runTest_$(GPUSUFFIX).exe +endif -# Target: runCheck (run the C++ standalone executable check.exe, with a small number of events) +# Target: runCheck (run the C++ or CUDA/HIP standalone executable check.exe, with a small number of events) runCheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/check.exe -p 2 32 2 - -# Target: runGcheck (run the CUDA/HIP standalone executable gcheck.exe, with a small number of events) -runGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/gcheck.exe -p 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/check_cpp.exe -p 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +endif -# Target: runFcheck (run the Fortran standalone executable - with C++ MEs - fcheck.exe, with a small number of events) +# Target: runFcheck (run the Fortran standalone executable - with C++ or CUDA/HIP MEs - fcheck.exe, with a small number of events) runFcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 - -# Target: runFGcheck (run the Fortran standalone executable - with CUDA/HIP MEs - fgcheck.exe, with a small number of events) -runFGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 +endif -# Target: cmpFcheck (compare ME results from the C++ and Fortran with C++ MEs standalone executables, with a small number of events) +# Target: cmpFcheck (compare ME results from the C++/CUDA/HIP and Fortran with C++/CUDA/HIP MEs standalone executables, with a small number of events) cmpFcheck: all.$(TAG) @echo - @echo "$(BUILDDIR)/check.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/check.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi - -# Target: cmpFGcheck (compare ME results from the CUDA/HIP and Fortran with CUDA/HIP MEs standalone executables, with a small number of events) -cmpFGcheck: all.$(TAG) - @echo - @echo "$(BUILDDIR)/gcheck.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fgcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/gcheck.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +ifeq ($(GPUCC),) + @echo "$(BUILDDIR)/check_cpp.exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_cpp.exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_cpp.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +else + @echo "$(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +endif # Target: cuda-memcheck (run the CUDA standalone executable gcheck.exe with a small number of events through cuda-memcheck) cuda-memcheck: all.$(TAG) - $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/gcheck.exe -p 2 32 2 + $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/susy_gg_tt.sa/src/Parameters_MSSM_SLHA2.h b/epochX/cudacpp/susy_gg_tt.sa/src/Parameters_MSSM_SLHA2.h index 19eaafe599..6b9cead062 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/src/Parameters_MSSM_SLHA2.h +++ b/epochX/cudacpp/susy_gg_tt.sa/src/Parameters_MSSM_SLHA2.h @@ -58,8 +58,7 @@ namespace mg5amcCpu // (none) // Model parameters dependent on aS - //double mdl_sqrt__aS, G; // now computed event-by-event (running alphas #373) - //cxsmpl mdl_G__exp__2; // now computed event-by-event (running alphas #373) + //double mdl_sqrt__aS, G, mdl_G__exp__2; // now computed event-by-event (running alphas #373) // Model couplings dependent on aS //cxsmpl GC_6, GC_51; // now computed event-by-event (running alphas #373) @@ -750,7 +749,7 @@ namespace mg5amcCpu // Model parameters dependent on aS //constexpr double mdl_sqrt__aS = constexpr_sqrt( aS ); // now computed event-by-event (running alphas #373) //constexpr double G = 2. * mdl_sqrt__aS * constexpr_sqrt( M_PI ); // now computed event-by-event (running alphas #373) - //constexpr cxsmpl mdl_G__exp__2 = ( ( G ) * ( G ) ); // now computed event-by-event (running alphas #373) + //constexpr double mdl_G__exp__2 = ( ( G ) * ( G ) ); // now computed event-by-event (running alphas #373) // Model couplings dependent on aS //constexpr cxsmpl GC_6 = -G; // now computed event-by-event (running alphas #373) diff --git a/epochX/cudacpp/susy_gg_tt.sa/src/cudacpp_src.mk b/epochX/cudacpp/susy_gg_tt.sa/src/cudacpp_src.mk index 4fe7fcd011..ebb1d4d1fa 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/src/cudacpp_src.mk +++ b/epochX/cudacpp/susy_gg_tt.sa/src/cudacpp_src.mk @@ -98,7 +98,11 @@ endif # NB1: there are no CUDA targets in src as we avoid RDC! # NB2: CUDA includes for curand.h are no longer needed in the C++ code anywhere in src! -MG5AMC_COMMONLIB = mg5amc_common +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) .DEFAULT_GOAL := all.$(TAG) @@ -126,7 +130,7 @@ $(LIBDIR)/.build.$(TAG): #------------------------------------------------------------------------------- # Generic target and build rules: objects from C++ compilation -$(BUILDDIR)/%.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) +$(BUILDDIR)/%_cpp.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ @@ -139,22 +143,22 @@ endif #------------------------------------------------------------------------------- -cxx_objects=$(addprefix $(BUILDDIR)/, read_slha.o) -ifneq ($(GPUCC),) - gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_MSSM_SLHA2_$(GPUSUFFIX).o) +cxx_objects=$(addprefix $(BUILDDIR)/, read_slha_cpp.o) +ifeq ($(GPUCC),) + cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_MSSM_SLHA2_cpp.o) else - cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_MSSM_SLHA2.o) + gpu_objects=$(addprefix $(BUILDDIR)/, Parameters_MSSM_SLHA2_$(GPUSUFFIX).o) endif # Target (and build rules): common (src) library -ifneq ($(GPUCC),) -$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) - @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) -else +ifeq ($(GPUCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) +else +$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(gpu_objects) + @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi + $(GPUCC) -shared -o $@ $(cxx_objects) $(gpu_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- From 6a6e2b9c0a9808b2cd3cd8a4325fbdf09112b5ea Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 15 May 2024 22:29:51 +0200 Subject: [PATCH 28/48] [jtmk2] prepare to merge the latest jtmk: go back to the last common codegen logs GITMB=$(git merge-base --fork-point jtmk HEAD) echo $GITMB 522069ab8168d502ac1a9041956e0f949eedcce6 git checkout $GITMB $(git ls-tree --name-only $GITMB */CODEGEN*txt) --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 30 ++++---- .../CODEGEN_cudacpp_ee_mumu_log.txt | 20 +++--- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 32 ++++----- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 18 ++--- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 36 +++++----- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 34 ++++----- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 18 ++--- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 34 ++++----- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 18 ++--- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 36 +++++----- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 20 +++--- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 34 ++++----- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 20 +++--- .../CODEGEN_mad_heft_gg_bb_log.txt | 28 ++++---- .../CODEGEN_cudacpp_heft_gg_bb_log.txt | 60 ++-------------- .../CODEGEN_mad_pp_tt012j_log.txt | 72 +++++++++---------- .../CODEGEN_mad_smeft_gg_tttt_log.txt | 34 ++++----- .../CODEGEN_cudacpp_smeft_gg_tttt_log.txt | 56 +++------------ .../CODEGEN_mad_susy_gg_t1t1_log.txt | 28 ++++---- .../CODEGEN_cudacpp_susy_gg_t1t1_log.txt | 16 ++--- .../CODEGEN_mad_susy_gg_tt_log.txt | 28 ++++---- .../CODEGEN_cudacpp_susy_gg_tt_log.txt | 19 +++-- 22 files changed, 301 insertions(+), 390 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index 9a5327e985..e061610590 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005074739456176758  +DEBUG: model prefixing takes 0.0058252811431884766  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -154,7 +154,7 @@ INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Process has 2 diagrams -1 processes with 2 diagrams generated in 0.004 s +1 processes with 2 diagrams generated in 0.005 s Total: 1 processes with 2 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_ee_mumu --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -177,7 +177,7 @@ INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 INFO: Creating files in directory P1_epem_mupmum DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -194,19 +194,19 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group epem_mupmum Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -Wrote files for 8 helas calls in 0.099 s +Wrote files for 8 helas calls in 0.105 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.190 s +ALOHA: aloha creates 3 routines in 0.212 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.240 s +ALOHA: aloha creates 7 routines in 0.269 s FFV1 FFV1 FFV2 @@ -250,9 +250,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.851s -user 0m1.596s -sys 0m0.235s +real 0m1.972s +user 0m1.747s +sys 0m0.214s Code generation completed in 2 seconds ************************************************************ * * @@ -278,8 +278,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards run quit INFO: @@ -308,8 +308,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards param quit INFO: diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index c84ee401ac..9d92f3f8d9 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00506591796875  +DEBUG: model prefixing takes 0.005830049514770508  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -154,7 +154,7 @@ INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Process has 2 diagrams -1 processes with 2 diagrams generated in 0.004 s +1 processes with 2 diagrams generated in 0.005 s Total: 1 processes with 2 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_ee_mumu Load PLUGIN.CUDACPP_OUTPUT @@ -177,14 +177,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/. -Generated helas calls for 1 subprocesses (2 diagrams) in 0.003 s +Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.251 s +ALOHA: aloha creates 4 routines in 0.283 s FFV1 FFV1 FFV2 @@ -203,7 +203,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. quit -real 0m0.656s -user 0m0.570s -sys 0m0.059s -Code generation completed in 0 seconds +real 0m0.729s +user 0m0.624s +sys 0m0.060s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index d7e99ccf73..cc77de1073 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005176544189453125  +DEBUG: model prefixing takes 0.005805015563964844  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.008 s +1 processes with 3 diagrams generated in 0.009 s Total: 1 processes with 3 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -178,7 +178,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -194,16 +194,16 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.098 s +Wrote files for 10 helas calls in 0.107 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.135 s +ALOHA: aloha creates 2 routines in 0.154 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.121 s +ALOHA: aloha creates 4 routines in 0.140 s VVV1 FFV1 FFV1 @@ -239,10 +239,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.651s -user 0m1.428s -sys 0m0.220s -Code generation completed in 1 seconds +real 0m1.782s +user 0m1.573s +sys 0m0.205s +Code generation completed in 2 seconds ************************************************************ * * * W E L C O M E to * @@ -267,8 +267,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards run quit INFO: @@ -297,8 +297,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards param quit INFO: diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index eb517b045c..fc60349507 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005257606506347656  +DEBUG: model prefixing takes 0.005868434906005859  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.008 s +1 processes with 3 diagrams generated in 0.009 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_tt Load PLUGIN.CUDACPP_OUTPUT @@ -183,7 +183,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.146 s +ALOHA: aloha creates 2 routines in 0.154 s VVV1 FFV1 FFV1 @@ -198,7 +198,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. quit -real 0m0.544s -user 0m0.492s -sys 0m0.047s -Code generation completed in 1 seconds +real 0m0.562s +user 0m0.507s +sys 0m0.052s +Code generation completed in 0 seconds diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index 85093dfd4e..b9564eac61 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.004948139190673828  +DEBUG: model prefixing takes 0.005810976028442383  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.008 s +1 processes with 3 diagrams generated in 0.009 s Total: 1 processes with 3 diagrams add process g g > t t~ g INFO: Checking for minimal orders which gives processes. @@ -163,7 +163,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @2 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.019 s +1 processes with 16 diagrams generated in 0.021 s Total: 2 processes with 19 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt01g --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -188,7 +188,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P2_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -205,7 +205,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -220,15 +220,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx -Generated helas calls for 2 subprocesses (19 diagrams) in 0.041 s -Wrote files for 46 helas calls in 0.239 s +Generated helas calls for 2 subprocesses (19 diagrams) in 0.046 s +Wrote files for 46 helas calls in 0.258 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.304 s +ALOHA: aloha creates 5 routines in 0.348 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -236,7 +236,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.286 s +ALOHA: aloha creates 10 routines in 0.331 s VVV1 VVV1 FFV1 @@ -285,9 +285,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.217s -user 0m1.962s -sys 0m0.249s +real 0m2.416s +user 0m2.182s +sys 0m0.234s Code generation completed in 2 seconds ************************************************************ * * @@ -313,8 +313,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards run quit INFO: @@ -343,8 +343,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards param quit INFO: diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index d44a6475ba..fddf911cda 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00538945198059082  +DEBUG: model prefixing takes 0.0058176517486572266  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.021 s +1 processes with 16 diagrams generated in 0.023 s Total: 1 processes with 16 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -178,7 +178,7 @@ INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -193,15 +193,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg -Generated helas calls for 1 subprocesses (16 diagrams) in 0.036 s -Wrote files for 36 helas calls in 0.145 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.040 s +Wrote files for 36 helas calls in 0.159 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.306 s +ALOHA: aloha creates 5 routines in 0.349 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -209,7 +209,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.286 s +ALOHA: aloha creates 10 routines in 0.332 s VVV1 VVV1 FFV1 @@ -254,10 +254,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.113s -user 0m1.886s -sys 0m0.223s -Code generation completed in 2 seconds +real 0m3.337s +user 0m2.058s +sys 0m0.239s +Code generation completed in 3 seconds ************************************************************ * * * W E L C O M E to * @@ -282,8 +282,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards run quit INFO: @@ -312,8 +312,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards param quit INFO: diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index 87eef8bde0..7d0df1bb94 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00493621826171875  +DEBUG: model prefixing takes 0.005813121795654297  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.021 s +1 processes with 16 diagrams generated in 0.023 s Total: 1 processes with 16 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttg Load PLUGIN.CUDACPP_OUTPUT @@ -178,7 +178,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. -Generated helas calls for 1 subprocesses (16 diagrams) in 0.036 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.039 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -186,7 +186,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.303 s +ALOHA: aloha creates 5 routines in 0.347 s VVV1 VVV1 FFV1 @@ -206,7 +206,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. quit -real 0m0.746s -user 0m0.690s -sys 0m0.048s +real 0m0.824s +user 0m0.761s +sys 0m0.060s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index d88bbb63fe..70ce80900e 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005059242248535156  +DEBUG: model prefixing takes 0.005890607833862305  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.154 s +1 processes with 123 diagrams generated in 0.168 s Total: 1 processes with 123 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -178,7 +178,7 @@ INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 INFO: Creating files in directory P1_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -193,15 +193,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg -Generated helas calls for 1 subprocesses (123 diagrams) in 0.409 s -Wrote files for 222 helas calls in 0.662 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.453 s +Wrote files for 222 helas calls in 0.738 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.310 s +ALOHA: aloha creates 5 routines in 0.356 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -209,7 +209,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.305 s +ALOHA: aloha creates 10 routines in 0.338 s VVV1 VVV1 FFV1 @@ -257,10 +257,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.178s -user 0m2.929s -sys 0m0.239s -Code generation completed in 3 seconds +real 0m3.464s +user 0m3.217s +sys 0m0.241s +Code generation completed in 4 seconds ************************************************************ * * * W E L C O M E to * @@ -285,8 +285,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards run quit INFO: @@ -315,8 +315,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards param quit INFO: diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index bfb761500c..210673a0f2 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0052258968353271484  +DEBUG: model prefixing takes 0.005852460861206055  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.156 s +1 processes with 123 diagrams generated in 0.168 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT @@ -178,7 +178,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.410 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.452 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -186,7 +186,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.300 s +ALOHA: aloha creates 5 routines in 0.346 s VVV1 VVV1 FFV1 @@ -209,7 +209,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. quit -real 0m1.393s -user 0m1.323s -sys 0m0.058s +real 0m1.531s +user 0m1.465s +sys 0m0.053s Code generation completed in 2 seconds diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index a2d6829e85..a28e2c3783 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005289793014526367  +DEBUG: model prefixing takes 0.005841732025146484  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.846 s +1 processes with 1240 diagrams generated in 2.008 s Total: 1 processes with 1240 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -178,9 +178,9 @@ INFO: Generating Helas calls for process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Processing color information for process: g g > t t~ g g g @1 INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] -INFO: Color-Flow passed to 1630 term in 7s. Introduce 3030 contraction +INFO: Color-Flow passed to 1630 term in 8s. Introduce 3030 contraction DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -195,15 +195,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.350 s -Wrote files for 2281 helas calls in 17.442 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 7.025 s +Wrote files for 2281 helas calls in 19.714 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.299 s +ALOHA: aloha creates 5 routines in 0.340 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -211,7 +211,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.293 s +ALOHA: aloha creates 10 routines in 0.333 s VVV1 VVV1 FFV1 @@ -259,10 +259,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m27.829s -user 0m27.337s -sys 0m0.391s -Code generation completed in 27 seconds +real 0m31.105s +user 0m30.577s +sys 0m0.429s +Code generation completed in 31 seconds ************************************************************ * * * W E L C O M E to * @@ -287,8 +287,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards run quit INFO: @@ -317,8 +317,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards param quit INFO: diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index ce14eef7e6..d1d3da38ee 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00564885139465332  +DEBUG: model prefixing takes 0.0058422088623046875  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.834 s +1 processes with 1240 diagrams generated in 2.006 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -178,7 +178,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.228 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 7.034 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -186,7 +186,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.323 s +ALOHA: aloha creates 5 routines in 0.373 s VVV1 VVV1 FFV1 @@ -209,7 +209,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. quit -real 0m12.339s -user 0m12.185s -sys 0m0.102s -Code generation completed in 13 seconds +real 0m13.957s +user 0m13.797s +sys 0m0.108s +Code generation completed in 14 seconds diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index 5a014a7cb3..ac6d206671 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0051610469818115234  +DEBUG: model prefixing takes 0.005831003189086914  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -170,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.077 s +8 processes with 40 diagrams generated in 0.083 s Total: 8 processes with 40 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -201,7 +201,7 @@ INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -218,7 +218,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -233,17 +233,17 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux -Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s -Wrote files for 32 helas calls in 0.218 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.033 s +Wrote files for 32 helas calls in 0.233 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.139 s +ALOHA: aloha creates 2 routines in 0.155 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.124 s +ALOHA: aloha creates 4 routines in 0.139 s FFV1 FFV1 FFV1 @@ -296,9 +296,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.900s -user 0m1.650s -sys 0m0.246s +real 0m2.021s +user 0m1.798s +sys 0m0.224s Code generation completed in 2 seconds ************************************************************ * * @@ -324,8 +324,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards run quit INFO: @@ -354,8 +354,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards param quit INFO: diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index c1507b6837..0af1cee08d 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.004972219467163086  +DEBUG: model prefixing takes 0.0058209896087646484  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -170,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.077 s +8 processes with 40 diagrams generated in 0.083 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Load PLUGIN.CUDACPP_OUTPUT @@ -210,12 +210,12 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. -Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.032 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.136 s +ALOHA: aloha creates 2 routines in 0.154 s FFV1 FFV1 FFV1 @@ -231,7 +231,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. quit -real 0m0.639s -user 0m0.570s -sys 0m0.059s -Code generation completed in 1 seconds +real 0m0.686s +user 0m0.626s +sys 0m0.056s +Code generation completed in 0 seconds diff --git a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt index e752892fe5..661f542f0e 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -127,7 +127,7 @@ Defined multiparticle all = g u c d s u~ c~ d~ s~ a ve vm vt e- mu- ve~ vm~ vt~ generate g g > b b~ HIW<=1 INFO: Trying process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Process has 4 diagrams -1 processes with 4 diagrams generated in 0.005 s +1 processes with 4 diagrams generated in 0.006 s Total: 1 processes with 4 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_heft_gg_bb --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -150,7 +150,7 @@ INFO: Generating Helas calls for process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Processing color information for process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Creating files in directory P1_gg_bbx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -166,20 +166,20 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Finding symmetric diagrams for subprocess group gg_bbx Generated helas calls for 1 subprocesses (4 diagrams) in 0.009 s -Wrote files for 12 helas calls in 0.104 s +Wrote files for 12 helas calls in 0.110 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.249 s +ALOHA: aloha creates 4 routines in 0.280 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 8 routines in 0.233 s +ALOHA: aloha creates 8 routines in 0.263 s VVS3 VVV1 FFV1 @@ -217,9 +217,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.898s -user 0m1.654s -sys 0m0.240s +real 0m2.031s +user 0m1.827s +sys 0m0.208s Code generation completed in 2 seconds ************************************************************ * * @@ -245,8 +245,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards run quit INFO: @@ -275,8 +275,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards param quit INFO: diff --git a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt index af64a247ee..0ff1f087b1 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -62,54 +62,6 @@ set auto_convert_model T save options auto_convert_model save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt import model heft -INFO: download model from https://madgraph.mi.infn.it/Downloads/models/heft.tgz to the following directory: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/models  ---2024-05-15 12:50:52-- https://madgraph.mi.infn.it/Downloads/models/heft.tgz -Resolving madgraph.mi.infn.it (madgraph.mi.infn.it)... 192.135.21.75 -Connecting to madgraph.mi.infn.it (madgraph.mi.infn.it)|192.135.21.75|:443... connected. -HTTP request sent, awaiting response... 200 OK -Length: 50876 (50K) [application/x-gzip] -Saving to: ‘tmp.tgz’ - - 0K .......... .......... .......... .......... ......... 100% 2.79M=0.02s - -2024-05-15 12:50:52 (2.79 MB/s) - ‘tmp.tgz’ saved [50876/50876] - -heft/ -heft/write_param_card.py -heft/restrict_ckm.dat -heft/couplings.py -heft/HEFT_UFO.log -heft/lorentz.py -heft/__init__.py -heft/__pycache__/ -heft/particles.py -heft/object_library.py -heft/restrict_default.dat -heft/restrict_zeromass_ckm.dat -heft/restrict_no_b_mass.dat -heft/function_library.py -heft/parameters.py -heft/py3_model.pkl -heft/coupling_orders.py -heft/restrict_no_tau_mass.dat -heft/vertices.py -heft/restrict_no_masses.dat -heft/__pycache__/write_param_card.cpython-311.pyc -heft/__pycache__/parameters.cpython-311.pyc -heft/__pycache__/function_library.cpython-311.pyc -heft/__pycache__/coupling_orders.cpython-311.pyc -heft/__pycache__/object_library.cpython-311.pyc -heft/__pycache__/couplings.cpython-311.pyc -heft/__pycache__/particles.cpython-311.pyc -heft/__pycache__/vertices.cpython-311.pyc -heft/__pycache__/lorentz.cpython-311.pyc -heft/__pycache__/__init__.cpython-311.pyc -INFO: reload from .py file -INFO: load particles -INFO: load vertices -WARNING: coupling GC_13=-(complex(0,1)*GH) has direct dependence in aS but has QCD order set to 0. Automatic computation of scale uncertainty can be wrong for such model.  -WARNING: coupling GC_16=(complex(0,1)*Gphi)/8. has direct dependence in aS but has QCD order set to 0. Automatic computation of scale uncertainty can be wrong for such model.  -DEBUG: model prefixing takes 0.005690574645996094  INFO: Restrict model heft with file models/heft/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: s u w+ at order: QED=1  @@ -205,7 +157,7 @@ ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.249 s +ALOHA: aloha creates 4 routines in 0.283 s VVS3 VVV1 FFV1 @@ -222,7 +174,7 @@ INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. quit -real 0m1.060s -user 0m0.751s -sys 0m0.075s +real 0m0.681s +user 0m0.624s +sys 0m0.053s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index aa3a1e93aa..e974458719 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005166053771972656  +DEBUG: model prefixing takes 0.0058176517486572266  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -172,7 +172,7 @@ INFO: Process u~ u > t t~ added to mirror process u u~ > t t~ INFO: Process c~ c > t t~ added to mirror process c c~ > t t~ INFO: Process d~ d > t t~ added to mirror process d d~ > t t~ INFO: Process s~ s > t t~ added to mirror process s s~ > t t~ -5 processes with 7 diagrams generated in 0.029 s +5 processes with 7 diagrams generated in 0.031 s Total: 5 processes with 7 diagrams add process p p > t t~ j @1 INFO: Checking for minimal orders which gives processes. @@ -212,7 +212,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.134 s +13 processes with 76 diagrams generated in 0.145 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -378,7 +378,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.789 s +65 processes with 1119 diagrams generated in 1.960 s Total: 83 processes with 1202 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -500,7 +500,7 @@ INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Creating files in directory P2_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -517,7 +517,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg INFO: Creating files in directory P2_gg_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -534,7 +534,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux INFO: Creating files in directory P2_gu_ttxgu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -551,7 +551,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu INFO: Creating files in directory P2_gux_ttxgux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -568,7 +568,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux INFO: Creating files in directory P2_uux_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -585,7 +585,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -602,7 +602,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P2_uu_ttxuu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -619,7 +619,7 @@ INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu INFO: Creating files in directory P2_uux_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -636,7 +636,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux INFO: Creating files in directory P2_uxux_ttxuxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -653,7 +653,7 @@ INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux INFO: Creating files in directory P2_uc_ttxuc DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -670,7 +670,7 @@ INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc INFO: Creating files in directory P2_uux_ttxccx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -687,7 +687,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx INFO: Creating files in directory P2_ucx_ttxucx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -704,7 +704,7 @@ INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx INFO: Creating files in directory P2_uxcx_ttxuxcx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -721,7 +721,7 @@ INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -738,7 +738,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -755,7 +755,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux INFO: Creating files in directory P1_uux_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -772,7 +772,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group uux_ttxg INFO: Creating files in directory P0_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -789,7 +789,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group gg_ttx INFO: Creating files in directory P0_uux_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -804,15 +804,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -Generated helas calls for 18 subprocesses (372 diagrams) in 1.226 s -Wrote files for 810 helas calls in 3.151 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.378 s +Wrote files for 810 helas calls in 3.470 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.313 s +ALOHA: aloha creates 5 routines in 0.362 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -820,7 +820,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.289 s +ALOHA: aloha creates 10 routines in 0.336 s VVV1 VVV1 FFV1 @@ -1030,10 +1030,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m8.549s -user 0m8.032s -sys 0m0.448s -Code generation completed in 8 seconds +real 0m9.362s +user 0m8.883s +sys 0m0.445s +Code generation completed in 9 seconds ************************************************************ * * * W E L C O M E to * @@ -1058,8 +1058,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards run quit INFO: @@ -1088,8 +1088,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards param quit INFO: diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt index 301164c091..15ffc09ea7 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -77,7 +77,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.122955322265625  +DEBUG: model prefixing takes 0.1477065086364746  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -92,7 +92,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 3.543 s +1 processes with 72 diagrams generated in 3.958 s Total: 1 processes with 72 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_smeft_gg_tttt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -115,7 +115,7 @@ INFO: Generating Helas calls for process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ t t~ @1 INFO: Creating files in directory P1_gg_ttxttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -130,15 +130,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxttx -Generated helas calls for 1 subprocesses (72 diagrams) in 0.179 s -Wrote files for 119 helas calls in 0.395 s +Generated helas calls for 1 subprocesses (72 diagrams) in 0.200 s +Wrote files for 119 helas calls in 0.443 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.299 s +ALOHA: aloha creates 5 routines in 0.336 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines @@ -146,7 +146,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 10 routines in 0.317 s +ALOHA: aloha creates 10 routines in 0.351 s VVV5 VVV5 FFV1 @@ -191,10 +191,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m6.544s -user 0m6.256s -sys 0m0.262s -Code generation completed in 7 seconds +real 0m7.238s +user 0m6.991s +sys 0m0.229s +Code generation completed in 8 seconds ************************************************************ * * * W E L C O M E to * @@ -219,8 +219,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards run quit INFO: @@ -249,8 +249,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards param quit INFO: diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt index 298e9be9d9..7115deec1c 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -62,41 +62,6 @@ set auto_convert_model T save options auto_convert_model save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt import model SMEFTsim_topU3l_MwScheme_UFO -massless_4t -INFO: download model from http://feynrules.irmp.ucl.ac.be/raw-attachment/wiki/SMEFT/SMEFTsim_topU3l_MwScheme_UFO.tar.gz to the following directory: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/models  ---2024-05-15 12:51:31-- http://feynrules.irmp.ucl.ac.be/raw-attachment/wiki/SMEFT/SMEFTsim_topU3l_MwScheme_UFO.tar.gz -Resolving feynrules.irmp.ucl.ac.be (feynrules.irmp.ucl.ac.be)... 130.104.48.109 -Connecting to feynrules.irmp.ucl.ac.be (feynrules.irmp.ucl.ac.be)|130.104.48.109|:80... connected. -HTTP request sent, awaiting response... 200 Ok -Length: 80562 (79K) [application/x-tar] -Saving to: ‘tmp.tgz’ - - 0K .......... .......... .......... .......... .......... 63% 914K 0s - 50K .......... .......... ........ 100% 325M=0.05s - -2024-05-15 12:51:31 (1.40 MB/s) - ‘tmp.tgz’ saved [80562/80562] - -SMEFTsim_topU3l_MwScheme_UFO/ -SMEFTsim_topU3l_MwScheme_UFO/__init__.py -SMEFTsim_topU3l_MwScheme_UFO/param_card_massless.dat -SMEFTsim_topU3l_MwScheme_UFO/CT_couplings.py -SMEFTsim_topU3l_MwScheme_UFO/particles.py -SMEFTsim_topU3l_MwScheme_UFO/write_param_card.py -SMEFTsim_topU3l_MwScheme_UFO/decays.py -SMEFTsim_topU3l_MwScheme_UFO/parameters.py -SMEFTsim_topU3l_MwScheme_UFO/restrict_massless.dat -SMEFTsim_topU3l_MwScheme_UFO/object_library.py -SMEFTsim_topU3l_MwScheme_UFO/coupling_orders.py -SMEFTsim_topU3l_MwScheme_UFO/version.info -SMEFTsim_topU3l_MwScheme_UFO/function_library.py -SMEFTsim_topU3l_MwScheme_UFO/couplings.py -SMEFTsim_topU3l_MwScheme_UFO/propagators.py -SMEFTsim_topU3l_MwScheme_UFO/lorentz.py -SMEFTsim_topU3l_MwScheme_UFO/vertices.py -SMEFTsim_topU3l_MwScheme_UFO/restrict_SMlimit_massless.dat -fail to load model but auto_convert_model is on True. Trying to convert the model -convert model /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/models/SMEFTsim_topU3l_MwScheme_UFO -retry the load of the model -import model SMEFTsim_topU3l_MwScheme_UFO -massless_4t INFO: load particles INFO: load vertices CRITICAL: Model with non QCD emission of gluon (found 14 of those). @@ -112,7 +77,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.11452722549438477  +DEBUG: model prefixing takes 0.1475965976715088  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -121,16 +86,13 @@ Defined multiparticle l- = e- mu- Defined multiparticle vl = ve vm vt Defined multiparticle vl~ = ve~ vm~ vt~ Defined multiparticle all = g a ve vm vt ve~ vm~ vt~ u c t d s b t1 u~ c~ t~ d~ s~ b~ t1~ z w+ z1 w1+ h h1 w- w1- e- mu- ta- e+ mu+ ta+ -INFO: Change particles name to pass to MG5 convention -Kept definitions of multiparticles p / j / l+ / l- / vl / vl~ unchanged -Defined multiparticle all = g a ve vm vt ve~ vm~ vt~ u c t d s b t1 u~ c~ t~ d~ s~ b~ t1~ z w+ z1 w1+ h h1 w- w1- e- mu- ta- e+ mu+ ta+ generate g g > t t~ t t~ INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 3.569 s +1 processes with 72 diagrams generated in 3.945 s Total: 1 processes with 72 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt Load PLUGIN.CUDACPP_OUTPUT @@ -153,7 +115,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/. -Generated helas calls for 1 subprocesses (72 diagrams) in 0.183 s +Generated helas calls for 1 subprocesses (72 diagrams) in 0.198 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines @@ -161,7 +123,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.301 s +ALOHA: aloha creates 5 routines in 0.334 s VVV5 VVV5 FFV1 @@ -181,7 +143,7 @@ INFO: Created files Parameters_SMEFTsim_topU3l_MwScheme_UFO.h and Parameters_SME INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. quit -real 0m5.839s -user 0m4.944s -sys 0m0.101s +real 0m5.419s +user 0m5.327s +sys 0m0.067s Code generation completed in 6 seconds diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt index 7675af32e5..6060e5336f 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.119 s +1 processes with 6 diagrams generated in 0.131 s Total: 1 processes with 6 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_t1t1 --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -577,7 +577,7 @@ INFO: Generating Helas calls for process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t1 t1~ @1 INFO: Creating files in directory P1_gg_t1t1x DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -592,19 +592,19 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_t1t1x -Generated helas calls for 1 subprocesses (6 diagrams) in 0.008 s -Wrote files for 16 helas calls in 0.108 s +Generated helas calls for 1 subprocesses (6 diagrams) in 0.009 s +Wrote files for 16 helas calls in 0.118 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.172 s +ALOHA: aloha creates 3 routines in 0.197 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 6 routines in 0.167 s +ALOHA: aloha creates 6 routines in 0.193 s VVV1 VSS1 VSS1 @@ -645,8 +645,8 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.660s -user 0m2.390s +real 0m2.946s +user 0m2.668s sys 0m0.267s Code generation completed in 3 seconds ************************************************************ @@ -673,8 +673,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards run quit INFO: @@ -703,8 +703,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards param quit INFO: diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt index 2f5dfd6362..e4634819f9 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.120 s +1 processes with 6 diagrams generated in 0.132 s Total: 1 processes with 6 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1 Load PLUGIN.CUDACPP_OUTPUT @@ -583,7 +583,7 @@ ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.168 s +ALOHA: aloha creates 3 routines in 0.198 s VVV1 VSS1 VSS1 @@ -599,7 +599,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. quit -real 0m1.256s -user 0m1.176s -sys 0m0.072s -Code generation completed in 2 seconds +real 0m1.420s +user 0m1.352s +sys 0m0.061s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt index 5e673473ee..7491cac3c5 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt @@ -51,8 +51,8 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt.mg The import format was not given, so we guess it as command set stdout_level DEBUG @@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.116 s +1 processes with 3 diagrams generated in 0.126 s Total: 1 processes with 3 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_tt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -577,7 +577,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -593,16 +593,16 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.103 s +Wrote files for 10 helas calls in 0.110 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.128 s +ALOHA: aloha creates 2 routines in 0.145 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.124 s +ALOHA: aloha creates 4 routines in 0.141 s VVV1 FFV1 FFV1 @@ -638,9 +638,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.634s -user 0m2.369s -sys 0m0.243s +real 0m2.809s +user 0m2.561s +sys 0m0.237s Code generation completed in 3 seconds ************************************************************ * * @@ -666,8 +666,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards run quit INFO: @@ -696,8 +696,8 @@ INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards param quit INFO: diff --git a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt index ba9e415fc7..8d75a175ae 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt @@ -51,17 +51,14 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 set zerowidth_tchannel F import model MSSM_SLHA2 -INFO: load particles -INFO: load vertices -DEBUG: model prefixing takes 0.8075113296508789  INFO: Restrict model MSSM_SLHA2 with file models/MSSM_SLHA2/restrict_default.dat . INFO: Detect SLHA2 format. keeping restricted parameter in the param_card DEBUG: Simplifying conditional expressions  @@ -557,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.107 s +1 processes with 3 diagrams generated in 0.136 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_tt Load PLUGIN.CUDACPP_OUTPUT @@ -585,7 +582,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.129 s +ALOHA: aloha creates 2 routines in 0.145 s VVV1 FFV1 FFV1 @@ -600,7 +597,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. quit -real 0m2.342s -user 0m2.232s -sys 0m0.076s -Code generation completed in 3 seconds +real 0m1.370s +user 0m1.281s +sys 0m0.073s +Code generation completed in 2 seconds From ea91ca65160b6bf0953bfbe525619ef2a8d92f72 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 15 May 2024 22:36:54 +0200 Subject: [PATCH 29/48] [jtmk2] regenerate all processes after merging jtmk - strange, input.inc and Parameters.h changes for susy_gg_tt and susy_gg_t1t1 --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 14 +- .../CODEGEN_cudacpp_ee_mumu_log.txt | 8 +- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 16 +- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 8 +- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 20 +-- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 16 +- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 14 +- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 18 +-- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 16 +- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 20 +-- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 16 +- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 20 +-- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 12 +- .../CODEGEN_mad_heft_gg_bb_log.txt | 14 +- .../CODEGEN_cudacpp_heft_gg_bb_log.txt | 8 +- .../CODEGEN_mad_pp_tt012j_log.txt | 60 ++++---- .../CODEGEN_mad_smeft_gg_tttt_log.txt | 18 +-- .../CODEGEN_cudacpp_smeft_gg_tttt_log.txt | 12 +- .../CODEGEN_mad_susy_gg_t1t1_log.txt | 18 +-- .../susy_gg_t1t1.mad/Source/MODEL/input.inc | 142 +++++++++--------- .../src/Parameters_MSSM_SLHA2.h | 5 +- .../CODEGEN_cudacpp_susy_gg_t1t1_log.txt | 6 +- .../src/Parameters_MSSM_SLHA2.h | 5 +- .../CODEGEN_mad_susy_gg_tt_log.txt | 14 +- .../susy_gg_tt.mad/Source/MODEL/input.inc | 142 +++++++++--------- .../src/Parameters_MSSM_SLHA2.h | 5 +- .../CODEGEN_cudacpp_susy_gg_tt_log.txt | 12 +- .../susy_gg_tt.sa/src/Parameters_MSSM_SLHA2.h | 5 +- 28 files changed, 332 insertions(+), 332 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index 4f57b9ac54..f49e898735 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0058667659759521484  +DEBUG: model prefixing takes 0.005671262741088867  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -177,7 +177,7 @@ INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 INFO: Creating files in directory P1_epem_mupmum DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -199,14 +199,14 @@ ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.218 s +ALOHA: aloha creates 3 routines in 0.209 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.269 s +ALOHA: aloha creates 7 routines in 0.266 s FFV1 FFV1 FFV2 @@ -250,9 +250,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.957s -user 0m1.719s -sys 0m0.230s +real 0m1.937s +user 0m1.729s +sys 0m0.202s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index 69249db5f4..63dc9fe355 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005859851837158203  +DEBUG: model prefixing takes 0.005807399749755859  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -184,7 +184,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.281 s +ALOHA: aloha creates 4 routines in 0.283 s FFV1 FFV1 FFV2 @@ -203,7 +203,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. quit -real 0m0.756s -user 0m0.632s +real 0m0.706s +user 0m0.637s sys 0m0.065s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index 6210200f50..2bef77ca41 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005722522735595703  +DEBUG: model prefixing takes 0.005599498748779297  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -178,7 +178,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -193,8 +193,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx -Generated helas calls for 1 subprocesses (3 diagrams) in 0.007 s -Wrote files for 10 helas calls in 0.109 s +Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s +Wrote files for 10 helas calls in 0.106 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines @@ -203,7 +203,7 @@ ALOHA: aloha creates 2 routines in 0.154 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.139 s +ALOHA: aloha creates 4 routines in 0.138 s VVV1 FFV1 FFV1 @@ -239,9 +239,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.779s -user 0m1.579s -sys 0m0.196s +real 0m1.767s +user 0m1.542s +sys 0m0.218s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index c8f652402b..0e530e2a27 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0057947635650634766  +DEBUG: model prefixing takes 0.0057544708251953125  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -199,6 +199,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/s quit real 0m0.555s -user 0m0.490s -sys 0m0.062s -Code generation completed in 1 seconds +user 0m0.507s +sys 0m0.045s +Code generation completed in 0 seconds diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index 20014f621c..213d94caf7 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005769014358520508  +DEBUG: model prefixing takes 0.005837678909301758  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -188,7 +188,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P2_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -205,7 +205,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -220,15 +220,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx -Generated helas calls for 2 subprocesses (19 diagrams) in 0.045 s -Wrote files for 46 helas calls in 0.257 s +Generated helas calls for 2 subprocesses (19 diagrams) in 0.046 s +Wrote files for 46 helas calls in 0.262 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.345 s +ALOHA: aloha creates 5 routines in 0.352 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -236,7 +236,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.328 s +ALOHA: aloha creates 10 routines in 0.336 s VVV1 VVV1 FFV1 @@ -285,9 +285,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.399s -user 0m2.169s -sys 0m0.231s +real 0m2.470s +user 0m2.209s +sys 0m0.245s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index fb93c2a74e..7ddb1403fe 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0058596134185791016  +DEBUG: model prefixing takes 0.005815982818603516  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -178,7 +178,7 @@ INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -193,15 +193,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg -Generated helas calls for 1 subprocesses (16 diagrams) in 0.040 s -Wrote files for 36 helas calls in 0.158 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.039 s +Wrote files for 36 helas calls in 0.156 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.342 s +ALOHA: aloha creates 5 routines in 0.345 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -254,9 +254,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.268s -user 0m2.048s -sys 0m0.221s +real 0m2.267s +user 0m2.045s +sys 0m0.224s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index 0b2e5c2a4c..0bba3094b0 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005775928497314453  +DEBUG: model prefixing takes 0.005731821060180664  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -178,7 +178,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. -Generated helas calls for 1 subprocesses (16 diagrams) in 0.039 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.038 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -186,7 +186,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.343 s +ALOHA: aloha creates 5 routines in 0.340 s VVV1 VVV1 FFV1 @@ -206,7 +206,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. quit -real 0m0.818s -user 0m0.763s -sys 0m0.051s -Code generation completed in 0 seconds +real 0m0.811s +user 0m0.755s +sys 0m0.052s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index b0eb4bc117..615cb5fda7 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005640506744384766  +DEBUG: model prefixing takes 0.005845069885253906  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.166 s +1 processes with 123 diagrams generated in 0.169 s Total: 1 processes with 123 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -178,7 +178,7 @@ INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 INFO: Creating files in directory P1_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -193,15 +193,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg -Generated helas calls for 1 subprocesses (123 diagrams) in 0.449 s -Wrote files for 222 helas calls in 0.729 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.453 s +Wrote files for 222 helas calls in 0.744 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.349 s +ALOHA: aloha creates 5 routines in 0.357 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -257,9 +257,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.426s -user 0m3.165s -sys 0m0.251s +real 0m3.464s +user 0m3.220s +sys 0m0.237s Code generation completed in 4 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index 01e2e1e58f..994a65898e 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005846738815307617  +DEBUG: model prefixing takes 0.005870342254638672  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.165 s +1 processes with 123 diagrams generated in 0.167 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT @@ -178,7 +178,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.447 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.454 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -186,7 +186,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.332 s +ALOHA: aloha creates 5 routines in 0.346 s VVV1 VVV1 FFV1 @@ -209,7 +209,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. quit -real 0m1.503s -user 0m1.439s -sys 0m0.058s -Code generation completed in 2 seconds +real 0m1.541s +user 0m1.470s +sys 0m0.063s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index d01addebe9..83bfdf174d 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0058705806732177734  +DEBUG: model prefixing takes 0.005798816680908203  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.979 s +1 processes with 1240 diagrams generated in 1.969 s Total: 1 processes with 1240 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -180,7 +180,7 @@ INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] INFO: Color-Flow passed to 1630 term in 8s. Introduce 3030 contraction DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -195,15 +195,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.941 s -Wrote files for 2281 helas calls in 19.565 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.936 s +Wrote files for 2281 helas calls in 19.465 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.335 s +ALOHA: aloha creates 5 routines in 0.334 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -211,7 +211,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.330 s +ALOHA: aloha creates 10 routines in 0.328 s VVV1 VVV1 FFV1 @@ -259,9 +259,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m30.825s -user 0m30.307s -sys 0m0.411s +real 0m30.712s +user 0m30.183s +sys 0m0.428s Code generation completed in 31 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index f69ae47bf3..c96b19803e 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005766630172729492  +DEBUG: model prefixing takes 0.005761861801147461  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.995 s +1 processes with 1240 diagrams generated in 1.975 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -178,7 +178,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.886 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.951 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -186,7 +186,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.368 s +ALOHA: aloha creates 5 routines in 0.366 s VVV1 VVV1 FFV1 @@ -209,7 +209,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. quit -real 0m13.671s -user 0m13.504s -sys 0m0.117s -Code generation completed in 13 seconds +real 0m13.758s +user 0m13.574s +sys 0m0.124s +Code generation completed in 14 seconds diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index b5ebeb4b75..4bd29bfc4f 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0058400630950927734  +DEBUG: model prefixing takes 0.005829811096191406  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -170,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.084 s +8 processes with 40 diagrams generated in 0.082 s Total: 8 processes with 40 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -201,7 +201,7 @@ INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -218,7 +218,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -234,16 +234,16 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux Generated helas calls for 2 subprocesses (10 diagrams) in 0.033 s -Wrote files for 32 helas calls in 0.234 s +Wrote files for 32 helas calls in 0.231 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.155 s +ALOHA: aloha creates 2 routines in 0.152 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.141 s +ALOHA: aloha creates 4 routines in 0.138 s FFV1 FFV1 FFV1 @@ -296,9 +296,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.038s -user 0m1.818s -sys 0m0.212s +real 0m2.004s +user 0m1.777s +sys 0m0.228s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index 71494815bb..f85f286cbd 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005812883377075195  +DEBUG: model prefixing takes 0.005831241607666016  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -170,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.084 s +8 processes with 40 diagrams generated in 0.081 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Load PLUGIN.CUDACPP_OUTPUT @@ -215,7 +215,7 @@ Generated helas calls for 2 subprocesses (10 diagrams) in 0.032 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.153 s +ALOHA: aloha creates 2 routines in 0.151 s FFV1 FFV1 FFV1 @@ -231,7 +231,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. quit -real 0m0.686s -user 0m0.624s -sys 0m0.058s +real 0m0.673s +user 0m0.617s +sys 0m0.052s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt index 8cee1a79cc..4d98c8255b 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt @@ -150,7 +150,7 @@ INFO: Generating Helas calls for process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Processing color information for process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Creating files in directory P1_gg_bbx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -166,20 +166,20 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Finding symmetric diagrams for subprocess group gg_bbx Generated helas calls for 1 subprocesses (4 diagrams) in 0.009 s -Wrote files for 12 helas calls in 0.111 s +Wrote files for 12 helas calls in 0.110 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.276 s +ALOHA: aloha creates 4 routines in 0.277 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 8 routines in 0.261 s +ALOHA: aloha creates 8 routines in 0.262 s VVS3 VVV1 FFV1 @@ -217,9 +217,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.011s -user 0m1.802s -sys 0m0.211s +real 0m2.013s +user 0m1.774s +sys 0m0.241s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt index 69fbe369c1..6201e51594 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt @@ -174,7 +174,7 @@ INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. quit -real 0m0.670s -user 0m0.604s -sys 0m0.062s -Code generation completed in 0 seconds +real 0m0.665s +user 0m0.608s +sys 0m0.053s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index b0fc10707c..e3fed5eb5f 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005956411361694336  +DEBUG: model prefixing takes 0.005815029144287109  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -172,7 +172,7 @@ INFO: Process u~ u > t t~ added to mirror process u u~ > t t~ INFO: Process c~ c > t t~ added to mirror process c c~ > t t~ INFO: Process d~ d > t t~ added to mirror process d d~ > t t~ INFO: Process s~ s > t t~ added to mirror process s s~ > t t~ -5 processes with 7 diagrams generated in 0.032 s +5 processes with 7 diagrams generated in 0.031 s Total: 5 processes with 7 diagrams add process p p > t t~ j @1 INFO: Checking for minimal orders which gives processes. @@ -212,7 +212,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.147 s +13 processes with 76 diagrams generated in 0.143 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -378,7 +378,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.975 s +65 processes with 1119 diagrams generated in 1.940 s Total: 83 processes with 1202 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -500,7 +500,7 @@ INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Creating files in directory P2_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -517,7 +517,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg INFO: Creating files in directory P2_gg_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -534,7 +534,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux INFO: Creating files in directory P2_gu_ttxgu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -551,7 +551,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu INFO: Creating files in directory P2_gux_ttxgux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -568,7 +568,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux INFO: Creating files in directory P2_uux_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -585,7 +585,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -602,7 +602,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P2_uu_ttxuu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -619,7 +619,7 @@ INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu INFO: Creating files in directory P2_uux_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -636,7 +636,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux INFO: Creating files in directory P2_uxux_ttxuxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -653,7 +653,7 @@ INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux INFO: Creating files in directory P2_uc_ttxuc DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -670,7 +670,7 @@ INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc INFO: Creating files in directory P2_uux_ttxccx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -687,7 +687,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx INFO: Creating files in directory P2_ucx_ttxucx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -704,7 +704,7 @@ INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx INFO: Creating files in directory P2_uxcx_ttxuxcx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -721,7 +721,7 @@ INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -738,7 +738,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -755,7 +755,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux INFO: Creating files in directory P1_uux_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -772,7 +772,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group uux_ttxg INFO: Creating files in directory P0_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -789,7 +789,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group gg_ttx INFO: Creating files in directory P0_uux_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -804,15 +804,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -Generated helas calls for 18 subprocesses (372 diagrams) in 1.374 s -Wrote files for 810 helas calls in 3.456 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.359 s +Wrote files for 810 helas calls in 3.440 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.355 s +ALOHA: aloha creates 5 routines in 0.356 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -820,7 +820,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.327 s +ALOHA: aloha creates 10 routines in 0.331 s VVV1 VVV1 FFV1 @@ -1030,10 +1030,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m9.344s -user 0m8.848s -sys 0m0.461s -Code generation completed in 9 seconds +real 0m10.453s +user 0m8.760s +sys 0m0.481s +Code generation completed in 10 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt index 263e86554b..a8fd34b0cf 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt @@ -77,7 +77,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.1473391056060791  +DEBUG: model prefixing takes 0.14514756202697754  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -92,7 +92,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 3.918 s +1 processes with 72 diagrams generated in 3.882 s Total: 1 processes with 72 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_smeft_gg_tttt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -115,7 +115,7 @@ INFO: Generating Helas calls for process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ t t~ @1 INFO: Creating files in directory P1_gg_ttxttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -130,7 +130,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxttx -Generated helas calls for 1 subprocesses (72 diagrams) in 0.196 s +Generated helas calls for 1 subprocesses (72 diagrams) in 0.198 s Wrote files for 119 helas calls in 0.436 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines @@ -138,7 +138,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.340 s +ALOHA: aloha creates 5 routines in 0.335 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines @@ -146,7 +146,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 10 routines in 0.353 s +ALOHA: aloha creates 10 routines in 0.346 s VVV5 VVV5 FFV1 @@ -191,9 +191,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m7.185s -user 0m6.891s -sys 0m0.275s +real 0m7.130s +user 0m6.861s +sys 0m0.249s Code generation completed in 7 seconds ************************************************************ * * diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt index 28afb306f5..f5a4be27a2 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt @@ -77,7 +77,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.14701032638549805  +DEBUG: model prefixing takes 0.14623308181762695  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -92,7 +92,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 3.917 s +1 processes with 72 diagrams generated in 3.900 s Total: 1 processes with 72 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt Load PLUGIN.CUDACPP_OUTPUT @@ -123,7 +123,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.338 s +ALOHA: aloha creates 5 routines in 0.333 s VVV5 VVV5 FFV1 @@ -143,7 +143,7 @@ INFO: Created files Parameters_SMEFTsim_topU3l_MwScheme_UFO.h and Parameters_SME INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. quit -real 0m5.392s -user 0m5.280s -sys 0m0.075s +real 0m5.362s +user 0m5.249s +sys 0m0.090s Code generation completed in 5 seconds diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt index ecf78249d6..a1beb5bbec 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt @@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.130 s +1 processes with 6 diagrams generated in 0.129 s Total: 1 processes with 6 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_t1t1 --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -577,7 +577,7 @@ INFO: Generating Helas calls for process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t1 t1~ @1 INFO: Creating files in directory P1_gg_t1t1x DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -593,18 +593,18 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_t1t1x Generated helas calls for 1 subprocesses (6 diagrams) in 0.008 s -Wrote files for 16 helas calls in 0.116 s +Wrote files for 16 helas calls in 0.115 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.195 s +ALOHA: aloha creates 3 routines in 0.192 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 6 routines in 0.192 s +ALOHA: aloha creates 6 routines in 0.189 s VVV1 VSS1 VSS1 @@ -645,10 +645,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.894s -user 0m2.641s -sys 0m0.253s -Code generation completed in 3 seconds +real 0m4.265s +user 0m2.653s +sys 0m0.247s +Code generation completed in 4 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Source/MODEL/input.inc b/epochX/cudacpp/susy_gg_t1t1.mad/Source/MODEL/input.inc index 59b3e6e7f2..286a92e533 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/Source/MODEL/input.inc +++ b/epochX/cudacpp/susy_gg_t1t1.mad/Source/MODEL/input.inc @@ -2,30 +2,29 @@ ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc c written by the UFO converter ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc - DOUBLE PRECISION MDL_SQRT__AS,MDL_G__EXP__2,MDL_MSD5,MDL_MSD2 - $ ,MDL_MSU5,MDL_MSU2,MDL_MSL5,MDL_MSL2,MDL_MSN2,MDL_RMU22X2 - $ ,MDL_RMQ22X2,MDL_RML22X2,MDL_RME22X2,MDL_RMD22X2 - $ ,MDL_CONJG__RN3X3,MDL_CONJG__CKM3X3,MDL_RU4X4,MDL_RU1X1 - $ ,MDL_RN3X3,MDL_RN1X1,MDL_RL4X4,MDL_RL1X1,MDL_RD4X4,MDL_RD1X1 - $ ,MDL_I98X11,MDL_I97X11,MDL_I96X11,MDL_I93X11,MDL_I92X11 - $ ,MDL_I87X11,MDL_I82X11,MDL_I74X11,MDL_I6X44,MDL_I5X11 - $ ,MDL_I53X11,MDL_I52X44,MDL_I51X11,MDL_I39X11,MDL_I31X11 - $ ,MDL_I26X44,MDL_I25X11,MDL_I12X11,MDL_I102X44,MDL_I101X44 - $ ,MDL_I100X44,MDL_CKM3X3,MDL_ATAN__TB,MDL_BETA,MDL_CW - $ ,MDL_MZ__EXP__2,MDL_CW__EXP__2,MDL_SW,MDL_COS__BETA - $ ,MDL_SIN__BETA,MDL_SQRT__2,MDL_SW__EXP__2,MDL_COS__ALP - $ ,MDL_SIN__ALP,MDL_EE,MDL_GP,MDL_GW,MDL_VEV,MDL_VD,MDL_VU - $ ,MDL_EE__EXP__2,MDL_RRD1X1,MDL_RRD2X2,MDL_RRD3X3,MDL_RRD3X6 - $ ,MDL_RRD4X4,MDL_RRD5X5,MDL_RRD6X3,MDL_RRD6X6,MDL_ALP,MDL_RMUH - $ ,MDL_TB,MDL_MA2,MDL_RMD21X1,MDL_RMD23X3,MDL_RME21X1,MDL_RME23X3 - $ ,MDL_RML21X1,MDL_RML23X3,MDL_RMX1,MDL_RMX2,MDL_RMX3,MDL_MHD2 - $ ,MDL_MHU2,MDL_RMQ21X1,MDL_RMQ23X3,MDL_RMU21X1,MDL_RMU23X3 - $ ,MDL_RNN1X1,MDL_RNN1X2,MDL_RNN1X3,MDL_RNN1X4,MDL_RNN2X1 - $ ,MDL_RNN2X2,MDL_RNN2X3,MDL_RNN2X4,MDL_RNN3X1,MDL_RNN3X2 - $ ,MDL_RNN3X3,MDL_RNN3X4,MDL_RNN4X1,MDL_RNN4X2,MDL_RNN4X3 - $ ,MDL_RNN4X4,MDL_RRL1X1,MDL_RRL2X2,MDL_RRL3X3,MDL_RRL3X6 - $ ,MDL_RRL4X4,MDL_RRL5X5,MDL_RRL6X3,MDL_RRL6X6,AEWM1,AS - $ ,MDL_RRN1X1,MDL_RRN2X2,MDL_RRN3X3,MDL_RTD3X3,MDL_RTE3X3 + DOUBLE PRECISION MDL_SQRT__AS,MDL_MSD5,MDL_MSD2,MDL_MSU5 + $ ,MDL_MSU2,MDL_MSL5,MDL_MSL2,MDL_MSN2,MDL_RMU22X2,MDL_RMQ22X2 + $ ,MDL_RML22X2,MDL_RME22X2,MDL_RMD22X2,MDL_CONJG__RN3X3 + $ ,MDL_CONJG__CKM3X3,MDL_RU4X4,MDL_RU1X1,MDL_RN3X3,MDL_RN1X1 + $ ,MDL_RL4X4,MDL_RL1X1,MDL_RD4X4,MDL_RD1X1,MDL_I98X11,MDL_I97X11 + $ ,MDL_I96X11,MDL_I93X11,MDL_I92X11,MDL_I87X11,MDL_I82X11 + $ ,MDL_I74X11,MDL_I6X44,MDL_I5X11,MDL_I53X11,MDL_I52X44 + $ ,MDL_I51X11,MDL_I39X11,MDL_I31X11,MDL_I26X44,MDL_I25X11 + $ ,MDL_I12X11,MDL_I102X44,MDL_I101X44,MDL_I100X44,MDL_CKM3X3 + $ ,MDL_ATAN__TB,MDL_BETA,MDL_CW,MDL_MZ__EXP__2,MDL_CW__EXP__2 + $ ,MDL_SW,MDL_COS__BETA,MDL_SIN__BETA,MDL_SQRT__2,MDL_SW__EXP__2 + $ ,MDL_COS__ALP,MDL_SIN__ALP,MDL_EE,MDL_GP,MDL_GW,MDL_VEV,MDL_VD + $ ,MDL_VU,MDL_EE__EXP__2,MDL_RRD1X1,MDL_RRD2X2,MDL_RRD3X3 + $ ,MDL_RRD3X6,MDL_RRD4X4,MDL_RRD5X5,MDL_RRD6X3,MDL_RRD6X6,MDL_ALP + $ ,MDL_RMUH,MDL_TB,MDL_MA2,MDL_RMD21X1,MDL_RMD23X3,MDL_RME21X1 + $ ,MDL_RME23X3,MDL_RML21X1,MDL_RML23X3,MDL_RMX1,MDL_RMX2,MDL_RMX3 + $ ,MDL_MHD2,MDL_MHU2,MDL_RMQ21X1,MDL_RMQ23X3,MDL_RMU21X1 + $ ,MDL_RMU23X3,MDL_RNN1X1,MDL_RNN1X2,MDL_RNN1X3,MDL_RNN1X4 + $ ,MDL_RNN2X1,MDL_RNN2X2,MDL_RNN2X3,MDL_RNN2X4,MDL_RNN3X1 + $ ,MDL_RNN3X2,MDL_RNN3X3,MDL_RNN3X4,MDL_RNN4X1,MDL_RNN4X2 + $ ,MDL_RNN4X3,MDL_RNN4X4,MDL_RRL1X1,MDL_RRL2X2,MDL_RRL3X3 + $ ,MDL_RRL3X6,MDL_RRL4X4,MDL_RRL5X5,MDL_RRL6X3,MDL_RRL6X6,AEWM1 + $ ,AS,MDL_RRN1X1,MDL_RRN2X2,MDL_RRN3X3,MDL_RTD3X3,MDL_RTE3X3 $ ,MDL_RTU3X3,MDL_RUU1X1,MDL_RUU1X2,MDL_RUU2X1,MDL_RUU2X2 $ ,MDL_RMNS1X1,MDL_RMNS2X2,MDL_RMNS3X3,MDL_RRU1X1,MDL_RRU2X2 $ ,MDL_RRU3X3,MDL_RRU3X6,MDL_RRU4X4,MDL_RRU5X5,MDL_RRU6X3 @@ -33,30 +32,29 @@ ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc $ ,MDL_RVV1X2,MDL_RVV2X1,MDL_RVV2X2,MDL_RYD3X3,MDL_RYE3X3 $ ,MDL_RYU3X3 - COMMON/PARAMS_R/ MDL_SQRT__AS,MDL_G__EXP__2,MDL_MSD5,MDL_MSD2 - $ ,MDL_MSU5,MDL_MSU2,MDL_MSL5,MDL_MSL2,MDL_MSN2,MDL_RMU22X2 - $ ,MDL_RMQ22X2,MDL_RML22X2,MDL_RME22X2,MDL_RMD22X2 - $ ,MDL_CONJG__RN3X3,MDL_CONJG__CKM3X3,MDL_RU4X4,MDL_RU1X1 - $ ,MDL_RN3X3,MDL_RN1X1,MDL_RL4X4,MDL_RL1X1,MDL_RD4X4,MDL_RD1X1 - $ ,MDL_I98X11,MDL_I97X11,MDL_I96X11,MDL_I93X11,MDL_I92X11 - $ ,MDL_I87X11,MDL_I82X11,MDL_I74X11,MDL_I6X44,MDL_I5X11 - $ ,MDL_I53X11,MDL_I52X44,MDL_I51X11,MDL_I39X11,MDL_I31X11 - $ ,MDL_I26X44,MDL_I25X11,MDL_I12X11,MDL_I102X44,MDL_I101X44 - $ ,MDL_I100X44,MDL_CKM3X3,MDL_ATAN__TB,MDL_BETA,MDL_CW - $ ,MDL_MZ__EXP__2,MDL_CW__EXP__2,MDL_SW,MDL_COS__BETA - $ ,MDL_SIN__BETA,MDL_SQRT__2,MDL_SW__EXP__2,MDL_COS__ALP - $ ,MDL_SIN__ALP,MDL_EE,MDL_GP,MDL_GW,MDL_VEV,MDL_VD,MDL_VU - $ ,MDL_EE__EXP__2,MDL_RRD1X1,MDL_RRD2X2,MDL_RRD3X3,MDL_RRD3X6 - $ ,MDL_RRD4X4,MDL_RRD5X5,MDL_RRD6X3,MDL_RRD6X6,MDL_ALP,MDL_RMUH - $ ,MDL_TB,MDL_MA2,MDL_RMD21X1,MDL_RMD23X3,MDL_RME21X1,MDL_RME23X3 - $ ,MDL_RML21X1,MDL_RML23X3,MDL_RMX1,MDL_RMX2,MDL_RMX3,MDL_MHD2 - $ ,MDL_MHU2,MDL_RMQ21X1,MDL_RMQ23X3,MDL_RMU21X1,MDL_RMU23X3 - $ ,MDL_RNN1X1,MDL_RNN1X2,MDL_RNN1X3,MDL_RNN1X4,MDL_RNN2X1 - $ ,MDL_RNN2X2,MDL_RNN2X3,MDL_RNN2X4,MDL_RNN3X1,MDL_RNN3X2 - $ ,MDL_RNN3X3,MDL_RNN3X4,MDL_RNN4X1,MDL_RNN4X2,MDL_RNN4X3 - $ ,MDL_RNN4X4,MDL_RRL1X1,MDL_RRL2X2,MDL_RRL3X3,MDL_RRL3X6 - $ ,MDL_RRL4X4,MDL_RRL5X5,MDL_RRL6X3,MDL_RRL6X6,AEWM1,AS - $ ,MDL_RRN1X1,MDL_RRN2X2,MDL_RRN3X3,MDL_RTD3X3,MDL_RTE3X3 + COMMON/PARAMS_R/ MDL_SQRT__AS,MDL_MSD5,MDL_MSD2,MDL_MSU5 + $ ,MDL_MSU2,MDL_MSL5,MDL_MSL2,MDL_MSN2,MDL_RMU22X2,MDL_RMQ22X2 + $ ,MDL_RML22X2,MDL_RME22X2,MDL_RMD22X2,MDL_CONJG__RN3X3 + $ ,MDL_CONJG__CKM3X3,MDL_RU4X4,MDL_RU1X1,MDL_RN3X3,MDL_RN1X1 + $ ,MDL_RL4X4,MDL_RL1X1,MDL_RD4X4,MDL_RD1X1,MDL_I98X11,MDL_I97X11 + $ ,MDL_I96X11,MDL_I93X11,MDL_I92X11,MDL_I87X11,MDL_I82X11 + $ ,MDL_I74X11,MDL_I6X44,MDL_I5X11,MDL_I53X11,MDL_I52X44 + $ ,MDL_I51X11,MDL_I39X11,MDL_I31X11,MDL_I26X44,MDL_I25X11 + $ ,MDL_I12X11,MDL_I102X44,MDL_I101X44,MDL_I100X44,MDL_CKM3X3 + $ ,MDL_ATAN__TB,MDL_BETA,MDL_CW,MDL_MZ__EXP__2,MDL_CW__EXP__2 + $ ,MDL_SW,MDL_COS__BETA,MDL_SIN__BETA,MDL_SQRT__2,MDL_SW__EXP__2 + $ ,MDL_COS__ALP,MDL_SIN__ALP,MDL_EE,MDL_GP,MDL_GW,MDL_VEV,MDL_VD + $ ,MDL_VU,MDL_EE__EXP__2,MDL_RRD1X1,MDL_RRD2X2,MDL_RRD3X3 + $ ,MDL_RRD3X6,MDL_RRD4X4,MDL_RRD5X5,MDL_RRD6X3,MDL_RRD6X6,MDL_ALP + $ ,MDL_RMUH,MDL_TB,MDL_MA2,MDL_RMD21X1,MDL_RMD23X3,MDL_RME21X1 + $ ,MDL_RME23X3,MDL_RML21X1,MDL_RML23X3,MDL_RMX1,MDL_RMX2,MDL_RMX3 + $ ,MDL_MHD2,MDL_MHU2,MDL_RMQ21X1,MDL_RMQ23X3,MDL_RMU21X1 + $ ,MDL_RMU23X3,MDL_RNN1X1,MDL_RNN1X2,MDL_RNN1X3,MDL_RNN1X4 + $ ,MDL_RNN2X1,MDL_RNN2X2,MDL_RNN2X3,MDL_RNN2X4,MDL_RNN3X1 + $ ,MDL_RNN3X2,MDL_RNN3X3,MDL_RNN3X4,MDL_RNN4X1,MDL_RNN4X2 + $ ,MDL_RNN4X3,MDL_RNN4X4,MDL_RRL1X1,MDL_RRL2X2,MDL_RRL3X3 + $ ,MDL_RRL3X6,MDL_RRL4X4,MDL_RRL5X5,MDL_RRL6X3,MDL_RRL6X6,AEWM1 + $ ,AS,MDL_RRN1X1,MDL_RRN2X2,MDL_RRN3X3,MDL_RTD3X3,MDL_RTE3X3 $ ,MDL_RTU3X3,MDL_RUU1X1,MDL_RUU1X2,MDL_RUU2X1,MDL_RUU2X2 $ ,MDL_RMNS1X1,MDL_RMNS2X2,MDL_RMNS3X3,MDL_RRU1X1,MDL_RRU2X2 $ ,MDL_RRU3X3,MDL_RRU3X6,MDL_RRU4X4,MDL_RRU5X5,MDL_RRU6X3 @@ -65,18 +63,18 @@ ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc $ ,MDL_RYU3X3 - DOUBLE COMPLEX MDL_MD21X1,MDL_MD22X2,MDL_MD23X3,MDL_ME21X1 - $ ,MDL_ME22X2,MDL_ME23X3,MDL_ML21X1,MDL_ML22X2,MDL_ML23X3 - $ ,MDL_MQ21X1,MDL_MQ22X2,MDL_MQ23X3,MDL_MU21X1,MDL_MU22X2 - $ ,MDL_MU23X3,MDL_MUH,MDL_MX1,MDL_MX2,MDL_MX3,MDL_NN1X1,MDL_NN1X2 - $ ,MDL_NN1X3,MDL_NN1X4,MDL_NN2X1,MDL_NN2X2,MDL_NN2X3,MDL_NN2X4 - $ ,MDL_NN3X1,MDL_NN3X2,MDL_NN3X3,MDL_NN3X4,MDL_NN4X1,MDL_NN4X2 - $ ,MDL_NN4X3,MDL_NN4X4,MDL_RD3X3,MDL_RD3X6,MDL_RD6X3,MDL_RD6X6 - $ ,MDL_RL3X3,MDL_RL3X6,MDL_RL6X3,MDL_RL6X6,MDL_RU3X3,MDL_RU3X6 - $ ,MDL_RU6X3,MDL_RU6X6,MDL_UU1X1,MDL_UU1X2,MDL_UU2X1,MDL_UU2X2 - $ ,MDL_VV1X1,MDL_VV1X2,MDL_VV2X1,MDL_VV2X2,MDL_TD3X3,MDL_TE3X3 - $ ,MDL_TU3X3,MDL_YD3X3,MDL_YE3X3,MDL_YU3X3,MDL_BB - $ ,MDL_CONJG__YU3X3,MDL_I1X33,MDL_CONJG__YD3X3,MDL_I10X33 + DOUBLE COMPLEX MDL_G__EXP__2,MDL_MD21X1,MDL_MD22X2,MDL_MD23X3 + $ ,MDL_ME21X1,MDL_ME22X2,MDL_ME23X3,MDL_ML21X1,MDL_ML22X2 + $ ,MDL_ML23X3,MDL_MQ21X1,MDL_MQ22X2,MDL_MQ23X3,MDL_MU21X1 + $ ,MDL_MU22X2,MDL_MU23X3,MDL_MUH,MDL_MX1,MDL_MX2,MDL_MX3 + $ ,MDL_NN1X1,MDL_NN1X2,MDL_NN1X3,MDL_NN1X4,MDL_NN2X1,MDL_NN2X2 + $ ,MDL_NN2X3,MDL_NN2X4,MDL_NN3X1,MDL_NN3X2,MDL_NN3X3,MDL_NN3X4 + $ ,MDL_NN4X1,MDL_NN4X2,MDL_NN4X3,MDL_NN4X4,MDL_RD3X3,MDL_RD3X6 + $ ,MDL_RD6X3,MDL_RD6X6,MDL_RL3X3,MDL_RL3X6,MDL_RL6X3,MDL_RL6X6 + $ ,MDL_RU3X3,MDL_RU3X6,MDL_RU6X3,MDL_RU6X6,MDL_UU1X1,MDL_UU1X2 + $ ,MDL_UU2X1,MDL_UU2X2,MDL_VV1X1,MDL_VV1X2,MDL_VV2X1,MDL_VV2X2 + $ ,MDL_TD3X3,MDL_TE3X3,MDL_TU3X3,MDL_YD3X3,MDL_YE3X3,MDL_YU3X3 + $ ,MDL_BB,MDL_CONJG__YU3X3,MDL_I1X33,MDL_CONJG__YD3X3,MDL_I10X33 $ ,MDL_I10X36,MDL_CONJG__RD3X6,MDL_I100X33,MDL_I100X36 $ ,MDL_CONJG__RD6X6,MDL_I100X63,MDL_I100X66,MDL_CONJG__RL3X6 $ ,MDL_I101X33,MDL_I101X36,MDL_CONJG__RL6X6,MDL_I101X63 @@ -150,18 +148,18 @@ ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc $ ,MDL_CONJG__UU2X2,MDL_CONJG__VV1X1,MDL_CONJG__VV1X2 $ ,MDL_CONJG__VV2X1,MDL_CONJG__VV2X2,MDL_CONJG__MUH - COMMON/PARAMS_C/ MDL_MD21X1,MDL_MD22X2,MDL_MD23X3,MDL_ME21X1 - $ ,MDL_ME22X2,MDL_ME23X3,MDL_ML21X1,MDL_ML22X2,MDL_ML23X3 - $ ,MDL_MQ21X1,MDL_MQ22X2,MDL_MQ23X3,MDL_MU21X1,MDL_MU22X2 - $ ,MDL_MU23X3,MDL_MUH,MDL_MX1,MDL_MX2,MDL_MX3,MDL_NN1X1,MDL_NN1X2 - $ ,MDL_NN1X3,MDL_NN1X4,MDL_NN2X1,MDL_NN2X2,MDL_NN2X3,MDL_NN2X4 - $ ,MDL_NN3X1,MDL_NN3X2,MDL_NN3X3,MDL_NN3X4,MDL_NN4X1,MDL_NN4X2 - $ ,MDL_NN4X3,MDL_NN4X4,MDL_RD3X3,MDL_RD3X6,MDL_RD6X3,MDL_RD6X6 - $ ,MDL_RL3X3,MDL_RL3X6,MDL_RL6X3,MDL_RL6X6,MDL_RU3X3,MDL_RU3X6 - $ ,MDL_RU6X3,MDL_RU6X6,MDL_UU1X1,MDL_UU1X2,MDL_UU2X1,MDL_UU2X2 - $ ,MDL_VV1X1,MDL_VV1X2,MDL_VV2X1,MDL_VV2X2,MDL_TD3X3,MDL_TE3X3 - $ ,MDL_TU3X3,MDL_YD3X3,MDL_YE3X3,MDL_YU3X3,MDL_BB - $ ,MDL_CONJG__YU3X3,MDL_I1X33,MDL_CONJG__YD3X3,MDL_I10X33 + COMMON/PARAMS_C/ MDL_G__EXP__2,MDL_MD21X1,MDL_MD22X2,MDL_MD23X3 + $ ,MDL_ME21X1,MDL_ME22X2,MDL_ME23X3,MDL_ML21X1,MDL_ML22X2 + $ ,MDL_ML23X3,MDL_MQ21X1,MDL_MQ22X2,MDL_MQ23X3,MDL_MU21X1 + $ ,MDL_MU22X2,MDL_MU23X3,MDL_MUH,MDL_MX1,MDL_MX2,MDL_MX3 + $ ,MDL_NN1X1,MDL_NN1X2,MDL_NN1X3,MDL_NN1X4,MDL_NN2X1,MDL_NN2X2 + $ ,MDL_NN2X3,MDL_NN2X4,MDL_NN3X1,MDL_NN3X2,MDL_NN3X3,MDL_NN3X4 + $ ,MDL_NN4X1,MDL_NN4X2,MDL_NN4X3,MDL_NN4X4,MDL_RD3X3,MDL_RD3X6 + $ ,MDL_RD6X3,MDL_RD6X6,MDL_RL3X3,MDL_RL3X6,MDL_RL6X3,MDL_RL6X6 + $ ,MDL_RU3X3,MDL_RU3X6,MDL_RU6X3,MDL_RU6X6,MDL_UU1X1,MDL_UU1X2 + $ ,MDL_UU2X1,MDL_UU2X2,MDL_VV1X1,MDL_VV1X2,MDL_VV2X1,MDL_VV2X2 + $ ,MDL_TD3X3,MDL_TE3X3,MDL_TU3X3,MDL_YD3X3,MDL_YE3X3,MDL_YU3X3 + $ ,MDL_BB,MDL_CONJG__YU3X3,MDL_I1X33,MDL_CONJG__YD3X3,MDL_I10X33 $ ,MDL_I10X36,MDL_CONJG__RD3X6,MDL_I100X33,MDL_I100X36 $ ,MDL_CONJG__RD6X6,MDL_I100X63,MDL_I100X66,MDL_CONJG__RL3X6 $ ,MDL_I101X33,MDL_I101X36,MDL_CONJG__RL6X6,MDL_I101X63 diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/src/Parameters_MSSM_SLHA2.h b/epochX/cudacpp/susy_gg_t1t1.mad/src/Parameters_MSSM_SLHA2.h index cad28e6b75..d37bb0bd26 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/src/Parameters_MSSM_SLHA2.h +++ b/epochX/cudacpp/susy_gg_t1t1.mad/src/Parameters_MSSM_SLHA2.h @@ -58,7 +58,8 @@ namespace mg5amcCpu // (none) // Model parameters dependent on aS - //double mdl_sqrt__aS, G, mdl_G__exp__2; // now computed event-by-event (running alphas #373) + //double mdl_sqrt__aS, G; // now computed event-by-event (running alphas #373) + //cxsmpl mdl_G__exp__2; // now computed event-by-event (running alphas #373) // Model couplings dependent on aS //cxsmpl GC_6, GC_55, GC_57, GC_90; // now computed event-by-event (running alphas #373) @@ -749,7 +750,7 @@ namespace mg5amcCpu // Model parameters dependent on aS //constexpr double mdl_sqrt__aS = constexpr_sqrt( aS ); // now computed event-by-event (running alphas #373) //constexpr double G = 2. * mdl_sqrt__aS * constexpr_sqrt( M_PI ); // now computed event-by-event (running alphas #373) - //constexpr double mdl_G__exp__2 = ( ( G ) * ( G ) ); // now computed event-by-event (running alphas #373) + //constexpr cxsmpl mdl_G__exp__2 = ( ( G ) * ( G ) ); // now computed event-by-event (running alphas #373) // Model couplings dependent on aS //constexpr cxsmpl GC_6 = -G; // now computed event-by-event (running alphas #373) diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt index dbbb4dc1d8..12d38d134f 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt @@ -599,7 +599,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. quit -real 0m1.404s -user 0m1.322s -sys 0m0.076s +real 0m1.397s +user 0m1.319s +sys 0m0.072s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/src/Parameters_MSSM_SLHA2.h b/epochX/cudacpp/susy_gg_t1t1.sa/src/Parameters_MSSM_SLHA2.h index cad28e6b75..d37bb0bd26 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/src/Parameters_MSSM_SLHA2.h +++ b/epochX/cudacpp/susy_gg_t1t1.sa/src/Parameters_MSSM_SLHA2.h @@ -58,7 +58,8 @@ namespace mg5amcCpu // (none) // Model parameters dependent on aS - //double mdl_sqrt__aS, G, mdl_G__exp__2; // now computed event-by-event (running alphas #373) + //double mdl_sqrt__aS, G; // now computed event-by-event (running alphas #373) + //cxsmpl mdl_G__exp__2; // now computed event-by-event (running alphas #373) // Model couplings dependent on aS //cxsmpl GC_6, GC_55, GC_57, GC_90; // now computed event-by-event (running alphas #373) @@ -749,7 +750,7 @@ namespace mg5amcCpu // Model parameters dependent on aS //constexpr double mdl_sqrt__aS = constexpr_sqrt( aS ); // now computed event-by-event (running alphas #373) //constexpr double G = 2. * mdl_sqrt__aS * constexpr_sqrt( M_PI ); // now computed event-by-event (running alphas #373) - //constexpr double mdl_G__exp__2 = ( ( G ) * ( G ) ); // now computed event-by-event (running alphas #373) + //constexpr cxsmpl mdl_G__exp__2 = ( ( G ) * ( G ) ); // now computed event-by-event (running alphas #373) // Model couplings dependent on aS //constexpr cxsmpl GC_6 = -G; // now computed event-by-event (running alphas #373) diff --git a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt index a815e2ae7a..92da114f35 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt @@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.123 s +1 processes with 3 diagrams generated in 0.124 s Total: 1 processes with 3 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_tt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -577,7 +577,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -597,12 +597,12 @@ Wrote files for 10 helas calls in 0.108 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.143 s +ALOHA: aloha creates 2 routines in 0.144 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.139 s +ALOHA: aloha creates 4 routines in 0.141 s VVV1 FFV1 FFV1 @@ -638,9 +638,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.770s -user 0m2.520s -sys 0m0.249s +real 0m2.762s +user 0m2.516s +sys 0m0.245s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/susy_gg_tt.mad/Source/MODEL/input.inc b/epochX/cudacpp/susy_gg_tt.mad/Source/MODEL/input.inc index 59b3e6e7f2..286a92e533 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/Source/MODEL/input.inc +++ b/epochX/cudacpp/susy_gg_tt.mad/Source/MODEL/input.inc @@ -2,30 +2,29 @@ ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc c written by the UFO converter ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc - DOUBLE PRECISION MDL_SQRT__AS,MDL_G__EXP__2,MDL_MSD5,MDL_MSD2 - $ ,MDL_MSU5,MDL_MSU2,MDL_MSL5,MDL_MSL2,MDL_MSN2,MDL_RMU22X2 - $ ,MDL_RMQ22X2,MDL_RML22X2,MDL_RME22X2,MDL_RMD22X2 - $ ,MDL_CONJG__RN3X3,MDL_CONJG__CKM3X3,MDL_RU4X4,MDL_RU1X1 - $ ,MDL_RN3X3,MDL_RN1X1,MDL_RL4X4,MDL_RL1X1,MDL_RD4X4,MDL_RD1X1 - $ ,MDL_I98X11,MDL_I97X11,MDL_I96X11,MDL_I93X11,MDL_I92X11 - $ ,MDL_I87X11,MDL_I82X11,MDL_I74X11,MDL_I6X44,MDL_I5X11 - $ ,MDL_I53X11,MDL_I52X44,MDL_I51X11,MDL_I39X11,MDL_I31X11 - $ ,MDL_I26X44,MDL_I25X11,MDL_I12X11,MDL_I102X44,MDL_I101X44 - $ ,MDL_I100X44,MDL_CKM3X3,MDL_ATAN__TB,MDL_BETA,MDL_CW - $ ,MDL_MZ__EXP__2,MDL_CW__EXP__2,MDL_SW,MDL_COS__BETA - $ ,MDL_SIN__BETA,MDL_SQRT__2,MDL_SW__EXP__2,MDL_COS__ALP - $ ,MDL_SIN__ALP,MDL_EE,MDL_GP,MDL_GW,MDL_VEV,MDL_VD,MDL_VU - $ ,MDL_EE__EXP__2,MDL_RRD1X1,MDL_RRD2X2,MDL_RRD3X3,MDL_RRD3X6 - $ ,MDL_RRD4X4,MDL_RRD5X5,MDL_RRD6X3,MDL_RRD6X6,MDL_ALP,MDL_RMUH - $ ,MDL_TB,MDL_MA2,MDL_RMD21X1,MDL_RMD23X3,MDL_RME21X1,MDL_RME23X3 - $ ,MDL_RML21X1,MDL_RML23X3,MDL_RMX1,MDL_RMX2,MDL_RMX3,MDL_MHD2 - $ ,MDL_MHU2,MDL_RMQ21X1,MDL_RMQ23X3,MDL_RMU21X1,MDL_RMU23X3 - $ ,MDL_RNN1X1,MDL_RNN1X2,MDL_RNN1X3,MDL_RNN1X4,MDL_RNN2X1 - $ ,MDL_RNN2X2,MDL_RNN2X3,MDL_RNN2X4,MDL_RNN3X1,MDL_RNN3X2 - $ ,MDL_RNN3X3,MDL_RNN3X4,MDL_RNN4X1,MDL_RNN4X2,MDL_RNN4X3 - $ ,MDL_RNN4X4,MDL_RRL1X1,MDL_RRL2X2,MDL_RRL3X3,MDL_RRL3X6 - $ ,MDL_RRL4X4,MDL_RRL5X5,MDL_RRL6X3,MDL_RRL6X6,AEWM1,AS - $ ,MDL_RRN1X1,MDL_RRN2X2,MDL_RRN3X3,MDL_RTD3X3,MDL_RTE3X3 + DOUBLE PRECISION MDL_SQRT__AS,MDL_MSD5,MDL_MSD2,MDL_MSU5 + $ ,MDL_MSU2,MDL_MSL5,MDL_MSL2,MDL_MSN2,MDL_RMU22X2,MDL_RMQ22X2 + $ ,MDL_RML22X2,MDL_RME22X2,MDL_RMD22X2,MDL_CONJG__RN3X3 + $ ,MDL_CONJG__CKM3X3,MDL_RU4X4,MDL_RU1X1,MDL_RN3X3,MDL_RN1X1 + $ ,MDL_RL4X4,MDL_RL1X1,MDL_RD4X4,MDL_RD1X1,MDL_I98X11,MDL_I97X11 + $ ,MDL_I96X11,MDL_I93X11,MDL_I92X11,MDL_I87X11,MDL_I82X11 + $ ,MDL_I74X11,MDL_I6X44,MDL_I5X11,MDL_I53X11,MDL_I52X44 + $ ,MDL_I51X11,MDL_I39X11,MDL_I31X11,MDL_I26X44,MDL_I25X11 + $ ,MDL_I12X11,MDL_I102X44,MDL_I101X44,MDL_I100X44,MDL_CKM3X3 + $ ,MDL_ATAN__TB,MDL_BETA,MDL_CW,MDL_MZ__EXP__2,MDL_CW__EXP__2 + $ ,MDL_SW,MDL_COS__BETA,MDL_SIN__BETA,MDL_SQRT__2,MDL_SW__EXP__2 + $ ,MDL_COS__ALP,MDL_SIN__ALP,MDL_EE,MDL_GP,MDL_GW,MDL_VEV,MDL_VD + $ ,MDL_VU,MDL_EE__EXP__2,MDL_RRD1X1,MDL_RRD2X2,MDL_RRD3X3 + $ ,MDL_RRD3X6,MDL_RRD4X4,MDL_RRD5X5,MDL_RRD6X3,MDL_RRD6X6,MDL_ALP + $ ,MDL_RMUH,MDL_TB,MDL_MA2,MDL_RMD21X1,MDL_RMD23X3,MDL_RME21X1 + $ ,MDL_RME23X3,MDL_RML21X1,MDL_RML23X3,MDL_RMX1,MDL_RMX2,MDL_RMX3 + $ ,MDL_MHD2,MDL_MHU2,MDL_RMQ21X1,MDL_RMQ23X3,MDL_RMU21X1 + $ ,MDL_RMU23X3,MDL_RNN1X1,MDL_RNN1X2,MDL_RNN1X3,MDL_RNN1X4 + $ ,MDL_RNN2X1,MDL_RNN2X2,MDL_RNN2X3,MDL_RNN2X4,MDL_RNN3X1 + $ ,MDL_RNN3X2,MDL_RNN3X3,MDL_RNN3X4,MDL_RNN4X1,MDL_RNN4X2 + $ ,MDL_RNN4X3,MDL_RNN4X4,MDL_RRL1X1,MDL_RRL2X2,MDL_RRL3X3 + $ ,MDL_RRL3X6,MDL_RRL4X4,MDL_RRL5X5,MDL_RRL6X3,MDL_RRL6X6,AEWM1 + $ ,AS,MDL_RRN1X1,MDL_RRN2X2,MDL_RRN3X3,MDL_RTD3X3,MDL_RTE3X3 $ ,MDL_RTU3X3,MDL_RUU1X1,MDL_RUU1X2,MDL_RUU2X1,MDL_RUU2X2 $ ,MDL_RMNS1X1,MDL_RMNS2X2,MDL_RMNS3X3,MDL_RRU1X1,MDL_RRU2X2 $ ,MDL_RRU3X3,MDL_RRU3X6,MDL_RRU4X4,MDL_RRU5X5,MDL_RRU6X3 @@ -33,30 +32,29 @@ ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc $ ,MDL_RVV1X2,MDL_RVV2X1,MDL_RVV2X2,MDL_RYD3X3,MDL_RYE3X3 $ ,MDL_RYU3X3 - COMMON/PARAMS_R/ MDL_SQRT__AS,MDL_G__EXP__2,MDL_MSD5,MDL_MSD2 - $ ,MDL_MSU5,MDL_MSU2,MDL_MSL5,MDL_MSL2,MDL_MSN2,MDL_RMU22X2 - $ ,MDL_RMQ22X2,MDL_RML22X2,MDL_RME22X2,MDL_RMD22X2 - $ ,MDL_CONJG__RN3X3,MDL_CONJG__CKM3X3,MDL_RU4X4,MDL_RU1X1 - $ ,MDL_RN3X3,MDL_RN1X1,MDL_RL4X4,MDL_RL1X1,MDL_RD4X4,MDL_RD1X1 - $ ,MDL_I98X11,MDL_I97X11,MDL_I96X11,MDL_I93X11,MDL_I92X11 - $ ,MDL_I87X11,MDL_I82X11,MDL_I74X11,MDL_I6X44,MDL_I5X11 - $ ,MDL_I53X11,MDL_I52X44,MDL_I51X11,MDL_I39X11,MDL_I31X11 - $ ,MDL_I26X44,MDL_I25X11,MDL_I12X11,MDL_I102X44,MDL_I101X44 - $ ,MDL_I100X44,MDL_CKM3X3,MDL_ATAN__TB,MDL_BETA,MDL_CW - $ ,MDL_MZ__EXP__2,MDL_CW__EXP__2,MDL_SW,MDL_COS__BETA - $ ,MDL_SIN__BETA,MDL_SQRT__2,MDL_SW__EXP__2,MDL_COS__ALP - $ ,MDL_SIN__ALP,MDL_EE,MDL_GP,MDL_GW,MDL_VEV,MDL_VD,MDL_VU - $ ,MDL_EE__EXP__2,MDL_RRD1X1,MDL_RRD2X2,MDL_RRD3X3,MDL_RRD3X6 - $ ,MDL_RRD4X4,MDL_RRD5X5,MDL_RRD6X3,MDL_RRD6X6,MDL_ALP,MDL_RMUH - $ ,MDL_TB,MDL_MA2,MDL_RMD21X1,MDL_RMD23X3,MDL_RME21X1,MDL_RME23X3 - $ ,MDL_RML21X1,MDL_RML23X3,MDL_RMX1,MDL_RMX2,MDL_RMX3,MDL_MHD2 - $ ,MDL_MHU2,MDL_RMQ21X1,MDL_RMQ23X3,MDL_RMU21X1,MDL_RMU23X3 - $ ,MDL_RNN1X1,MDL_RNN1X2,MDL_RNN1X3,MDL_RNN1X4,MDL_RNN2X1 - $ ,MDL_RNN2X2,MDL_RNN2X3,MDL_RNN2X4,MDL_RNN3X1,MDL_RNN3X2 - $ ,MDL_RNN3X3,MDL_RNN3X4,MDL_RNN4X1,MDL_RNN4X2,MDL_RNN4X3 - $ ,MDL_RNN4X4,MDL_RRL1X1,MDL_RRL2X2,MDL_RRL3X3,MDL_RRL3X6 - $ ,MDL_RRL4X4,MDL_RRL5X5,MDL_RRL6X3,MDL_RRL6X6,AEWM1,AS - $ ,MDL_RRN1X1,MDL_RRN2X2,MDL_RRN3X3,MDL_RTD3X3,MDL_RTE3X3 + COMMON/PARAMS_R/ MDL_SQRT__AS,MDL_MSD5,MDL_MSD2,MDL_MSU5 + $ ,MDL_MSU2,MDL_MSL5,MDL_MSL2,MDL_MSN2,MDL_RMU22X2,MDL_RMQ22X2 + $ ,MDL_RML22X2,MDL_RME22X2,MDL_RMD22X2,MDL_CONJG__RN3X3 + $ ,MDL_CONJG__CKM3X3,MDL_RU4X4,MDL_RU1X1,MDL_RN3X3,MDL_RN1X1 + $ ,MDL_RL4X4,MDL_RL1X1,MDL_RD4X4,MDL_RD1X1,MDL_I98X11,MDL_I97X11 + $ ,MDL_I96X11,MDL_I93X11,MDL_I92X11,MDL_I87X11,MDL_I82X11 + $ ,MDL_I74X11,MDL_I6X44,MDL_I5X11,MDL_I53X11,MDL_I52X44 + $ ,MDL_I51X11,MDL_I39X11,MDL_I31X11,MDL_I26X44,MDL_I25X11 + $ ,MDL_I12X11,MDL_I102X44,MDL_I101X44,MDL_I100X44,MDL_CKM3X3 + $ ,MDL_ATAN__TB,MDL_BETA,MDL_CW,MDL_MZ__EXP__2,MDL_CW__EXP__2 + $ ,MDL_SW,MDL_COS__BETA,MDL_SIN__BETA,MDL_SQRT__2,MDL_SW__EXP__2 + $ ,MDL_COS__ALP,MDL_SIN__ALP,MDL_EE,MDL_GP,MDL_GW,MDL_VEV,MDL_VD + $ ,MDL_VU,MDL_EE__EXP__2,MDL_RRD1X1,MDL_RRD2X2,MDL_RRD3X3 + $ ,MDL_RRD3X6,MDL_RRD4X4,MDL_RRD5X5,MDL_RRD6X3,MDL_RRD6X6,MDL_ALP + $ ,MDL_RMUH,MDL_TB,MDL_MA2,MDL_RMD21X1,MDL_RMD23X3,MDL_RME21X1 + $ ,MDL_RME23X3,MDL_RML21X1,MDL_RML23X3,MDL_RMX1,MDL_RMX2,MDL_RMX3 + $ ,MDL_MHD2,MDL_MHU2,MDL_RMQ21X1,MDL_RMQ23X3,MDL_RMU21X1 + $ ,MDL_RMU23X3,MDL_RNN1X1,MDL_RNN1X2,MDL_RNN1X3,MDL_RNN1X4 + $ ,MDL_RNN2X1,MDL_RNN2X2,MDL_RNN2X3,MDL_RNN2X4,MDL_RNN3X1 + $ ,MDL_RNN3X2,MDL_RNN3X3,MDL_RNN3X4,MDL_RNN4X1,MDL_RNN4X2 + $ ,MDL_RNN4X3,MDL_RNN4X4,MDL_RRL1X1,MDL_RRL2X2,MDL_RRL3X3 + $ ,MDL_RRL3X6,MDL_RRL4X4,MDL_RRL5X5,MDL_RRL6X3,MDL_RRL6X6,AEWM1 + $ ,AS,MDL_RRN1X1,MDL_RRN2X2,MDL_RRN3X3,MDL_RTD3X3,MDL_RTE3X3 $ ,MDL_RTU3X3,MDL_RUU1X1,MDL_RUU1X2,MDL_RUU2X1,MDL_RUU2X2 $ ,MDL_RMNS1X1,MDL_RMNS2X2,MDL_RMNS3X3,MDL_RRU1X1,MDL_RRU2X2 $ ,MDL_RRU3X3,MDL_RRU3X6,MDL_RRU4X4,MDL_RRU5X5,MDL_RRU6X3 @@ -65,18 +63,18 @@ ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc $ ,MDL_RYU3X3 - DOUBLE COMPLEX MDL_MD21X1,MDL_MD22X2,MDL_MD23X3,MDL_ME21X1 - $ ,MDL_ME22X2,MDL_ME23X3,MDL_ML21X1,MDL_ML22X2,MDL_ML23X3 - $ ,MDL_MQ21X1,MDL_MQ22X2,MDL_MQ23X3,MDL_MU21X1,MDL_MU22X2 - $ ,MDL_MU23X3,MDL_MUH,MDL_MX1,MDL_MX2,MDL_MX3,MDL_NN1X1,MDL_NN1X2 - $ ,MDL_NN1X3,MDL_NN1X4,MDL_NN2X1,MDL_NN2X2,MDL_NN2X3,MDL_NN2X4 - $ ,MDL_NN3X1,MDL_NN3X2,MDL_NN3X3,MDL_NN3X4,MDL_NN4X1,MDL_NN4X2 - $ ,MDL_NN4X3,MDL_NN4X4,MDL_RD3X3,MDL_RD3X6,MDL_RD6X3,MDL_RD6X6 - $ ,MDL_RL3X3,MDL_RL3X6,MDL_RL6X3,MDL_RL6X6,MDL_RU3X3,MDL_RU3X6 - $ ,MDL_RU6X3,MDL_RU6X6,MDL_UU1X1,MDL_UU1X2,MDL_UU2X1,MDL_UU2X2 - $ ,MDL_VV1X1,MDL_VV1X2,MDL_VV2X1,MDL_VV2X2,MDL_TD3X3,MDL_TE3X3 - $ ,MDL_TU3X3,MDL_YD3X3,MDL_YE3X3,MDL_YU3X3,MDL_BB - $ ,MDL_CONJG__YU3X3,MDL_I1X33,MDL_CONJG__YD3X3,MDL_I10X33 + DOUBLE COMPLEX MDL_G__EXP__2,MDL_MD21X1,MDL_MD22X2,MDL_MD23X3 + $ ,MDL_ME21X1,MDL_ME22X2,MDL_ME23X3,MDL_ML21X1,MDL_ML22X2 + $ ,MDL_ML23X3,MDL_MQ21X1,MDL_MQ22X2,MDL_MQ23X3,MDL_MU21X1 + $ ,MDL_MU22X2,MDL_MU23X3,MDL_MUH,MDL_MX1,MDL_MX2,MDL_MX3 + $ ,MDL_NN1X1,MDL_NN1X2,MDL_NN1X3,MDL_NN1X4,MDL_NN2X1,MDL_NN2X2 + $ ,MDL_NN2X3,MDL_NN2X4,MDL_NN3X1,MDL_NN3X2,MDL_NN3X3,MDL_NN3X4 + $ ,MDL_NN4X1,MDL_NN4X2,MDL_NN4X3,MDL_NN4X4,MDL_RD3X3,MDL_RD3X6 + $ ,MDL_RD6X3,MDL_RD6X6,MDL_RL3X3,MDL_RL3X6,MDL_RL6X3,MDL_RL6X6 + $ ,MDL_RU3X3,MDL_RU3X6,MDL_RU6X3,MDL_RU6X6,MDL_UU1X1,MDL_UU1X2 + $ ,MDL_UU2X1,MDL_UU2X2,MDL_VV1X1,MDL_VV1X2,MDL_VV2X1,MDL_VV2X2 + $ ,MDL_TD3X3,MDL_TE3X3,MDL_TU3X3,MDL_YD3X3,MDL_YE3X3,MDL_YU3X3 + $ ,MDL_BB,MDL_CONJG__YU3X3,MDL_I1X33,MDL_CONJG__YD3X3,MDL_I10X33 $ ,MDL_I10X36,MDL_CONJG__RD3X6,MDL_I100X33,MDL_I100X36 $ ,MDL_CONJG__RD6X6,MDL_I100X63,MDL_I100X66,MDL_CONJG__RL3X6 $ ,MDL_I101X33,MDL_I101X36,MDL_CONJG__RL6X6,MDL_I101X63 @@ -150,18 +148,18 @@ ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc $ ,MDL_CONJG__UU2X2,MDL_CONJG__VV1X1,MDL_CONJG__VV1X2 $ ,MDL_CONJG__VV2X1,MDL_CONJG__VV2X2,MDL_CONJG__MUH - COMMON/PARAMS_C/ MDL_MD21X1,MDL_MD22X2,MDL_MD23X3,MDL_ME21X1 - $ ,MDL_ME22X2,MDL_ME23X3,MDL_ML21X1,MDL_ML22X2,MDL_ML23X3 - $ ,MDL_MQ21X1,MDL_MQ22X2,MDL_MQ23X3,MDL_MU21X1,MDL_MU22X2 - $ ,MDL_MU23X3,MDL_MUH,MDL_MX1,MDL_MX2,MDL_MX3,MDL_NN1X1,MDL_NN1X2 - $ ,MDL_NN1X3,MDL_NN1X4,MDL_NN2X1,MDL_NN2X2,MDL_NN2X3,MDL_NN2X4 - $ ,MDL_NN3X1,MDL_NN3X2,MDL_NN3X3,MDL_NN3X4,MDL_NN4X1,MDL_NN4X2 - $ ,MDL_NN4X3,MDL_NN4X4,MDL_RD3X3,MDL_RD3X6,MDL_RD6X3,MDL_RD6X6 - $ ,MDL_RL3X3,MDL_RL3X6,MDL_RL6X3,MDL_RL6X6,MDL_RU3X3,MDL_RU3X6 - $ ,MDL_RU6X3,MDL_RU6X6,MDL_UU1X1,MDL_UU1X2,MDL_UU2X1,MDL_UU2X2 - $ ,MDL_VV1X1,MDL_VV1X2,MDL_VV2X1,MDL_VV2X2,MDL_TD3X3,MDL_TE3X3 - $ ,MDL_TU3X3,MDL_YD3X3,MDL_YE3X3,MDL_YU3X3,MDL_BB - $ ,MDL_CONJG__YU3X3,MDL_I1X33,MDL_CONJG__YD3X3,MDL_I10X33 + COMMON/PARAMS_C/ MDL_G__EXP__2,MDL_MD21X1,MDL_MD22X2,MDL_MD23X3 + $ ,MDL_ME21X1,MDL_ME22X2,MDL_ME23X3,MDL_ML21X1,MDL_ML22X2 + $ ,MDL_ML23X3,MDL_MQ21X1,MDL_MQ22X2,MDL_MQ23X3,MDL_MU21X1 + $ ,MDL_MU22X2,MDL_MU23X3,MDL_MUH,MDL_MX1,MDL_MX2,MDL_MX3 + $ ,MDL_NN1X1,MDL_NN1X2,MDL_NN1X3,MDL_NN1X4,MDL_NN2X1,MDL_NN2X2 + $ ,MDL_NN2X3,MDL_NN2X4,MDL_NN3X1,MDL_NN3X2,MDL_NN3X3,MDL_NN3X4 + $ ,MDL_NN4X1,MDL_NN4X2,MDL_NN4X3,MDL_NN4X4,MDL_RD3X3,MDL_RD3X6 + $ ,MDL_RD6X3,MDL_RD6X6,MDL_RL3X3,MDL_RL3X6,MDL_RL6X3,MDL_RL6X6 + $ ,MDL_RU3X3,MDL_RU3X6,MDL_RU6X3,MDL_RU6X6,MDL_UU1X1,MDL_UU1X2 + $ ,MDL_UU2X1,MDL_UU2X2,MDL_VV1X1,MDL_VV1X2,MDL_VV2X1,MDL_VV2X2 + $ ,MDL_TD3X3,MDL_TE3X3,MDL_TU3X3,MDL_YD3X3,MDL_YE3X3,MDL_YU3X3 + $ ,MDL_BB,MDL_CONJG__YU3X3,MDL_I1X33,MDL_CONJG__YD3X3,MDL_I10X33 $ ,MDL_I10X36,MDL_CONJG__RD3X6,MDL_I100X33,MDL_I100X36 $ ,MDL_CONJG__RD6X6,MDL_I100X63,MDL_I100X66,MDL_CONJG__RL3X6 $ ,MDL_I101X33,MDL_I101X36,MDL_CONJG__RL6X6,MDL_I101X63 diff --git a/epochX/cudacpp/susy_gg_tt.mad/src/Parameters_MSSM_SLHA2.h b/epochX/cudacpp/susy_gg_tt.mad/src/Parameters_MSSM_SLHA2.h index f0f67ac932..985d491474 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/src/Parameters_MSSM_SLHA2.h +++ b/epochX/cudacpp/susy_gg_tt.mad/src/Parameters_MSSM_SLHA2.h @@ -58,7 +58,8 @@ namespace mg5amcCpu // (none) // Model parameters dependent on aS - //double mdl_sqrt__aS, G, mdl_G__exp__2; // now computed event-by-event (running alphas #373) + //double mdl_sqrt__aS, G; // now computed event-by-event (running alphas #373) + //cxsmpl mdl_G__exp__2; // now computed event-by-event (running alphas #373) // Model couplings dependent on aS //cxsmpl GC_6, GC_51; // now computed event-by-event (running alphas #373) @@ -749,7 +750,7 @@ namespace mg5amcCpu // Model parameters dependent on aS //constexpr double mdl_sqrt__aS = constexpr_sqrt( aS ); // now computed event-by-event (running alphas #373) //constexpr double G = 2. * mdl_sqrt__aS * constexpr_sqrt( M_PI ); // now computed event-by-event (running alphas #373) - //constexpr double mdl_G__exp__2 = ( ( G ) * ( G ) ); // now computed event-by-event (running alphas #373) + //constexpr cxsmpl mdl_G__exp__2 = ( ( G ) * ( G ) ); // now computed event-by-event (running alphas #373) // Model couplings dependent on aS //constexpr cxsmpl GC_6 = -G; // now computed event-by-event (running alphas #373) diff --git a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt index 0f5cabf3db..98681b7916 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt @@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.136 s +1 processes with 3 diagrams generated in 0.124 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_tt Load PLUGIN.CUDACPP_OUTPUT @@ -582,7 +582,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.423 s +ALOHA: aloha creates 2 routines in 0.144 s VVV1 FFV1 FFV1 @@ -597,7 +597,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. quit -real 0m1.640s -user 0m1.275s -sys 0m0.067s -Code generation completed in 2 seconds +real 0m1.344s +user 0m1.272s +sys 0m0.066s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/susy_gg_tt.sa/src/Parameters_MSSM_SLHA2.h b/epochX/cudacpp/susy_gg_tt.sa/src/Parameters_MSSM_SLHA2.h index f0f67ac932..985d491474 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/src/Parameters_MSSM_SLHA2.h +++ b/epochX/cudacpp/susy_gg_tt.sa/src/Parameters_MSSM_SLHA2.h @@ -58,7 +58,8 @@ namespace mg5amcCpu // (none) // Model parameters dependent on aS - //double mdl_sqrt__aS, G, mdl_G__exp__2; // now computed event-by-event (running alphas #373) + //double mdl_sqrt__aS, G; // now computed event-by-event (running alphas #373) + //cxsmpl mdl_G__exp__2; // now computed event-by-event (running alphas #373) // Model couplings dependent on aS //cxsmpl GC_6, GC_51; // now computed event-by-event (running alphas #373) @@ -749,7 +750,7 @@ namespace mg5amcCpu // Model parameters dependent on aS //constexpr double mdl_sqrt__aS = constexpr_sqrt( aS ); // now computed event-by-event (running alphas #373) //constexpr double G = 2. * mdl_sqrt__aS * constexpr_sqrt( M_PI ); // now computed event-by-event (running alphas #373) - //constexpr double mdl_G__exp__2 = ( ( G ) * ( G ) ); // now computed event-by-event (running alphas #373) + //constexpr cxsmpl mdl_G__exp__2 = ( ( G ) * ( G ) ); // now computed event-by-event (running alphas #373) // Model couplings dependent on aS //constexpr cxsmpl GC_6 = -G; // now computed event-by-event (running alphas #373) From 1339c8b5a282a83a8ab6836f67f1425f0b9dcb75 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 15 May 2024 23:48:48 +0200 Subject: [PATCH 30/48] [jtmk2] adapt tput/throughputX.sh to the new executable names (e.g. no more gcheck.exe) --- epochX/cudacpp/tput/throughputX.sh | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/epochX/cudacpp/tput/throughputX.sh b/epochX/cudacpp/tput/throughputX.sh index 71376e9b08..dd8cfaafcb 100755 --- a/epochX/cudacpp/tput/throughputX.sh +++ b/epochX/cudacpp/tput/throughputX.sh @@ -219,7 +219,7 @@ while [ "$1" != "" ]; do detailed=1 shift elif [ "$1" == "-gtest" ]; then - # For simplicity a gtest runTest.exe is executed for each build where check.exe or gcheck.exe is executed + # For simplicity a gtest runTest_xxx.exe is executed for each build where check_xxx.exe is executed gtest=1 shift ###elif [ "$1" == "-nofpe" ]; then @@ -393,9 +393,9 @@ for dir in $dirs; do for bbld in cuda hip none sse4 avx2 512y 512z; do if [ "${bblds}" == "${bbldsall}" ] || [ "${bblds/${bbld}}" != "${bblds}" ]; then if [ "${bbld}" == "cuda" ] || [ "${bbld}" == "hip" ]; then - exes="$exes $dir/build.${bbld}_${fptype}_inl${helinl}${hrdsuf}/gcheck.exe" + exes="$exes $dir/build.${bbld}_${fptype}_inl${helinl}${hrdsuf}/check_${bbld}.exe" else - exes="$exes $dir/build.${bbld}_${fptype}_inl${helinl}${hrdsuf}/check.exe" + exes="$exes $dir/build.${bbld}_${fptype}_inl${helinl}${hrdsuf}/check_cpp.exe" fi fi done @@ -479,7 +479,7 @@ function runExe() { if [ "${maketype}" == "-dryrun" ]; then return; fi pattern="Process|fptype_sv|OMP threads|EvtsPerSec\[MECalc|MeanMatrix|FP precision|TOTAL :" # Optionally add other patterns here for some specific configurations (e.g. clang) - if [ "${exe%%/gcheck*}" != "${exe}" ]; then pattern="${pattern}|EvtsPerSec\[Matrix"; fi + if [ "${exe%%/check_cuda*}" != "${exe}" ] || [ "${exe%%/check_hip*}" != "${exe}" ]; then pattern="${pattern}|EvtsPerSec\[Matrix"; fi pattern="${pattern}|Workflow" ###pattern="${pattern}|CUCOMPLEX" ###pattern="${pattern}|COMMON RANDOM|CURAND HOST \(CUDA" @@ -511,7 +511,6 @@ function runExe() { function cmpExe() { exe=$1 exef=${exe/\/check//fcheck} - exef=${exef/\/gcheck//fgcheck} argsf="2 64 2" args="--common -p ${argsf}" echo "cmpExe $exe $args" @@ -520,7 +519,7 @@ function cmpExe() { tmp=$(mktemp) me1=$(${exe} ${args} 2>${tmp} | grep MeanMatrix | awk '{print $4}'); cat ${tmp} me2=$(${exef} ${argsf} 2>${tmp} | grep Average | awk '{print $4}'); cat ${tmp} - if [ "${exe%%/gcheck*}" != "${exe}" ]; then tag="/GPU)"; else tag="/C++) "; fi + if [ "${exe%%/check_cuda*}" != "${exe}" ] || [ "${exe%%/check_hip*}" != "${exe}" ]; then tag="/GPU)"; else tag="/C++) "; fi echo -e "Avg ME (C++${tag} = ${me1}\nAvg ME (F77${tag} = ${me2}" if [ "${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77${tag} returned NaN" @@ -716,8 +715,7 @@ for exe in $exes; do fi if [ "${gtest}" == "1" ]; then echo "-------------------------------------------------------------------------" - exe2=${exe/gcheck/runTest} # first try to replace gcheck.exe - exe2=${exe2/check/runTest} # then try to replace check.exe instead + exe2=${exe/check/runTest} # replace check_xxx.exe by runTest_xxx.exe echo "runExe $exe2" $exe2 2>&1 | tail -1 if [ ${PIPESTATUS[0]} -ne "0" ]; then exit 1; fi From 9aa47d436bd286deb4921c5e7882969e9b7e92c7 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 15 May 2024 23:51:36 +0200 Subject: [PATCH 31/48] [jtmk2] adapt tput/throughputX.sh and tput/simdSym.sh to the new name CPPProcess_cpp.o replacing CPPProcess.o --- epochX/cudacpp/tput/simdSym.sh | 7 ++++--- epochX/cudacpp/tput/throughputX.sh | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/epochX/cudacpp/tput/simdSym.sh b/epochX/cudacpp/tput/simdSym.sh index 36ce0a868c..057b218a33 100755 --- a/epochX/cudacpp/tput/simdSym.sh +++ b/epochX/cudacpp/tput/simdSym.sh @@ -2,6 +2,7 @@ # Copyright (C) 2020-2023 CERN and UCLouvain. # Licensed under the GNU Lesser General Public License (version 3 or later). # Created by: A. Valassi (Apr 2021) for the MG5aMC CUDACPP plugin. +# Further modified by: A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin. #---------------------------------------------------------------------------------------------- # See http://sponce.web.cern.ch/sponce/CSC/slides/PracticalVectorization.booklet.pdf @@ -38,7 +39,7 @@ function countSyms() { printf "(%24s : %4d) " "'$sym'" $(cat $dump | awk '/^ +[[:xdigit:]]+:\t/' | cut -f3- | egrep "$sym" | wc -l) done; printf "\n" } -#dump=$1; shift; countSyms $* # for debugging (./simdSym.sh ./build.none/CPPProcess.o.objdump 'addsd.*xmm') +#dump=$1; shift; countSyms $* # for debugging (./simdSym.sh ./build.none/CPPProcess_cpp.o.objdump 'addsd.*xmm') function mainCountSyms() { # Command line arguments: select file @@ -108,7 +109,7 @@ mainListSyms $* function mainCompareSyms() { allFileSyms="" for avx in none sse4 avx2 512y 512z; do - file=./build.$avx/CPPProcess.o + file=./build.$avx/CPPProcess_cpp.o fileSymsRaw=$file.symlist.raw mainListSyms $file > $fileSymsRaw ls -l $fileSymsRaw @@ -117,7 +118,7 @@ function mainCompareSyms() { allSymList=all.symlist cat $allFileSyms | sort -u | awk -vf1= -vf2= -vf3=0 '{if ($1==f1 && $2==f2){f3+=$3} else {if(f1!=""){print f1,f2,f3};f1=$1;f2=$2;f3=$3}}END{print f1,f2,f3}' | sort -u -k 1,2 > $allSymList for avx in none sse4 avx2 512y 512z; do - file=./build.$avx/CPPProcess.o + file=./build.$avx/CPPProcess_cpp.o fileSymsRaw=$file.symlist.raw fileSyms=$file.symlist cat $fileSymsRaw | awk -vall=$allSymList -vfmt="%5s %15s %5d\n" -vtot=0 -veof=1 '{f1=$1; f2=$2; f3=$3; tot+=f3; while(f3>0){getline < all; if($1==f1 && $2==f2){printf fmt,$1,$2,f3; f3=0} else {printf fmt,$1,$2,0}}} END{while(getline < all){printf fmt,$1,$2,0};printf fmt,"TOTAL","",tot}' > $fileSyms diff --git a/epochX/cudacpp/tput/throughputX.sh b/epochX/cudacpp/tput/throughputX.sh index dd8cfaafcb..6bbf4948d9 100755 --- a/epochX/cudacpp/tput/throughputX.sh +++ b/epochX/cudacpp/tput/throughputX.sh @@ -699,7 +699,7 @@ for exe in $exes; do runExe $exe "$exeArgs" if [ "${exe%%/check*}" != "${exe}" ]; then if [ "${maketype}" != "-dryrun" ]; then - obj=${exe%%/check*}/CPPProcess.o; $scrdir/simdSymSummary.sh -stripdir ${obj} -dumptotmp # comment out -dumptotmp to keep full objdump + obj=${exe%%/check*}/CPPProcess_cpp.o; $scrdir/simdSymSummary.sh -stripdir ${obj} -dumptotmp # comment out -dumptotmp to keep full objdump fi if [ "${omp}" == "1" ]; then echo "-------------------------------------------------------------------------" From a19105bcd2fa70e911f0cb0d7d06d56bb4e479df Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 15 May 2024 23:59:31 +0200 Subject: [PATCH 32/48] [jtmk2] fix tput/throughputX.sh (the new name CPPProcess_cpp.o is sometimes CPPProcess_cuda.o) --- epochX/cudacpp/tput/throughputX.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epochX/cudacpp/tput/throughputX.sh b/epochX/cudacpp/tput/throughputX.sh index 6bbf4948d9..68b06b6bd6 100755 --- a/epochX/cudacpp/tput/throughputX.sh +++ b/epochX/cudacpp/tput/throughputX.sh @@ -699,7 +699,7 @@ for exe in $exes; do runExe $exe "$exeArgs" if [ "${exe%%/check*}" != "${exe}" ]; then if [ "${maketype}" != "-dryrun" ]; then - obj=${exe%%/check*}/CPPProcess_cpp.o; $scrdir/simdSymSummary.sh -stripdir ${obj} -dumptotmp # comment out -dumptotmp to keep full objdump + obj=${exe%%.exe}; obj=${obj/check/CPPProcess}.o; $scrdir/simdSymSummary.sh -stripdir ${obj} -dumptotmp # comment out -dumptotmp to keep full objdump fi if [ "${omp}" == "1" ]; then echo "-------------------------------------------------------------------------" From d73d17a2fd4698784846fe098389ee0ca96bd894 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 16 May 2024 00:05:56 +0200 Subject: [PATCH 33/48] [jtmk2] adapt tmad/madX.sh to the new executable names (e.g. no more gcheck.exe) --- epochX/cudacpp/tmad/madX.sh | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/epochX/cudacpp/tmad/madX.sh b/epochX/cudacpp/tmad/madX.sh index 0926cf6223..eaeaf654fc 100755 --- a/epochX/cudacpp/tmad/madX.sh +++ b/epochX/cudacpp/tmad/madX.sh @@ -224,7 +224,7 @@ function getnevt() echo $nevt } -# Determine the appropriate CUDA/HIP grid dimension for the specific process (to run the fastest gcheck) +# Determine the appropriate CUDA/HIP grid dimension for the specific process (to run the fastest check_cuda or check_hip) function getgridmax() { if [ "${eemumu}" == "1" ]; then @@ -313,14 +313,14 @@ EOF echo ${tmp} } -# Run check.exe or gcheck.exe (depending on $1) and parse its output +# Run check_(cpp|cuda|hip).exe (depending on $1) and parse its output function runcheck() { - if [ "$1" == "" ] || [ "$2" != "" ]; then echo "Usage: runcheck "; exit 1; fi + if [ "$1" == "" ] || [ "$2" != "" ]; then echo "Usage: runcheck "; exit 1; fi cmd=$1 if [ "${cmd/gcheckmax128thr}" != "$cmd" ]; then txt="GCHECK(MAX128THR)" - cmd=${cmd/gcheckmax128thr/gcheck} # hack: run cuda/hip gcheck with tput fastest settings + cmd=${cmd/gcheckmax128thr/check_${backend}} # hack: run cuda/hip check with tput fastest settings cmd=${cmd/.\//.\/build.${backend}_${fptype}_inl0_hrd0\/} nblk=$(getgridmax | cut -d ' ' -f1) nthr=$(getgridmax | cut -d ' ' -f2) @@ -328,7 +328,7 @@ function runcheck() (( nevt = nblk*nthr )) elif [ "${cmd/gcheckmax8thr}" != "$cmd" ]; then txt="GCHECK(MAX8THR)" - cmd=${cmd/gcheckmax8thr/gcheck} # hack: run cuda/hip gcheck with tput fastest settings + cmd=${cmd/gcheckmax8thr/check_${backend}} # hack: run cuda/hip check with tput fastest settings cmd=${cmd/.\//.\/build.${backend}_${fptype}_inl0_hrd0\/} nblk=$(getgridmax | cut -d ' ' -f1) nthr=$(getgridmax | cut -d ' ' -f2) @@ -336,13 +336,14 @@ function runcheck() (( nevt = nblk*nthr )) elif [ "${cmd/gcheckmax}" != "$cmd" ]; then txt="GCHECK(MAX)" - cmd=${cmd/gcheckmax/gcheck} # hack: run cuda/hip gcheck with tput fastest settings + cmd=${cmd/gcheckmax/check_${backend}} # hack: run cuda/hip check with tput fastest settings cmd=${cmd/.\//.\/build.${backend}_${fptype}_inl0_hrd0\/} nblk=$(getgridmax | cut -d ' ' -f1) nthr=$(getgridmax | cut -d ' ' -f2) (( nevt = nblk*nthr )) elif [ "${cmd/gcheck}" != "$cmd" ]; then txt="GCHECK($NLOOP)" + cmd=${cmd/gcheck/check_${backend}} cmd=${cmd/.\//.\/build.${backend}_${fptype}_inl0_hrd0\/} nthr=32 (( nblk = NLOOP/nthr )) || true # integer division (NB: bash double parenthesis fails if the result is 0) @@ -351,6 +352,7 @@ function runcheck() nevt=$(getnevt) elif [ "${cmd/check}" != "$cmd" ]; then txt="CHECK($NLOOP)" + cmd=${cmd/check/check_cpp} cmd=${cmd/.\//.\/build.${backend}_${fptype}_inl0_hrd0\/} nthr=32 (( nblk = NLOOP/nthr )) || true # integer division (NB: bash double parenthesis fails if the result is 0) From 0595a7946a69f4d65978e9e4e94f7ed5a8824f30 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 16 May 2024 08:17:01 +0200 Subject: [PATCH 34/48] [jtmk] go back to itscrd90 logs for tput and tmad git checkout 79039c99c69fe62738c38d193cb7bf3d45bc011b tput/logs_* git checkout 522069ab8168d502ac1a9041956e0f949eedcce6 tmad/logs_* --- .../log_eemumu_mad_d_inl0_hrd0.txt | 432 ++++++++++------ .../log_eemumu_mad_f_inl0_hrd0.txt | 442 ++++++++++------ .../log_eemumu_mad_m_inl0_hrd0.txt | 422 +++++++++++----- .../log_ggtt_mad_d_inl0_hrd0.txt | 432 ++++++++++------ .../log_ggtt_mad_f_inl0_hrd0.txt | 436 ++++++++++------ .../log_ggtt_mad_m_inl0_hrd0.txt | 424 +++++++++++----- .../log_ggttg_mad_d_inl0_hrd0.txt | 448 +++++++++++------ .../log_ggttg_mad_f_inl0_hrd0.txt | 450 +++++++++++------ .../log_ggttg_mad_m_inl0_hrd0.txt | 448 +++++++++++------ .../log_ggttgg_mad_d_inl0_hrd0.txt | 448 +++++++++++------ .../log_ggttgg_mad_f_inl0_hrd0.txt | 448 +++++++++++------ .../log_ggttgg_mad_m_inl0_hrd0.txt | 448 +++++++++++------ .../log_ggttggg_mad_d_inl0_hrd0.txt | 438 ++++++++++------ .../log_ggttggg_mad_f_inl0_hrd0.txt | 442 ++++++++++------ .../log_ggttggg_mad_m_inl0_hrd0.txt | 438 ++++++++++------ .../log_gqttq_mad_d_inl0_hrd0.txt | 470 ++++++++++++----- .../log_gqttq_mad_f_inl0_hrd0.txt | 472 ++++++++++++----- .../log_gqttq_mad_m_inl0_hrd0.txt | 473 +++++++++++++----- .../log_heftggbb_mad_d_inl0_hrd0.txt | 440 ++++++++++------ .../log_heftggbb_mad_f_inl0_hrd0.txt | 112 ++--- .../log_heftggbb_mad_m_inl0_hrd0.txt | 446 +++++++++++------ .../log_smeftggtttt_mad_d_inl0_hrd0.txt | 438 ++++++++++------ .../log_smeftggtttt_mad_f_inl0_hrd0.txt | 440 ++++++++++------ .../log_smeftggtttt_mad_m_inl0_hrd0.txt | 434 ++++++++++------ .../log_susyggt1t1_mad_d_inl0_hrd0.txt | 74 +-- .../log_susyggt1t1_mad_f_inl0_hrd0.txt | 74 +-- .../log_susyggt1t1_mad_m_inl0_hrd0.txt | 76 +-- .../log_susyggtt_mad_d_inl0_hrd0.txt | 86 ++-- .../log_susyggtt_mad_f_inl0_hrd0.txt | 86 ++-- .../log_susyggtt_mad_m_inl0_hrd0.txt | 86 ++-- .../log_eemumu_mad_d_inl0_hrd0.txt | 253 ++++++---- .../log_eemumu_mad_d_inl0_hrd0_bridge.txt | 261 ++++++---- .../log_eemumu_mad_d_inl0_hrd0_common.txt | 239 +++++---- .../log_eemumu_mad_d_inl0_hrd0_rmbhst.txt | 256 ++++++---- .../log_eemumu_mad_d_inl0_hrd1.txt | 249 +++++---- .../log_eemumu_mad_d_inl1_hrd0.txt | 253 ++++++---- .../log_eemumu_mad_d_inl1_hrd1.txt | 253 ++++++---- .../log_eemumu_mad_f_inl0_hrd0.txt | 263 ++++++---- .../log_eemumu_mad_f_inl0_hrd0_bridge.txt | 269 ++++++---- .../log_eemumu_mad_f_inl0_hrd0_common.txt | 249 +++++---- .../log_eemumu_mad_f_inl0_hrd0_rmbhst.txt | 264 ++++++---- .../log_eemumu_mad_f_inl0_hrd1.txt | 263 ++++++---- .../log_eemumu_mad_f_inl1_hrd0.txt | 263 ++++++---- .../log_eemumu_mad_f_inl1_hrd1.txt | 263 ++++++---- .../log_eemumu_mad_m_inl0_hrd0.txt | 249 +++++---- .../log_eemumu_mad_m_inl0_hrd1.txt | 249 +++++---- .../log_ggtt_mad_d_inl0_hrd0.txt | 253 ++++++---- .../log_ggtt_mad_d_inl0_hrd0_bridge.txt | 261 ++++++---- .../log_ggtt_mad_d_inl0_hrd0_common.txt | 239 +++++---- .../log_ggtt_mad_d_inl0_hrd0_rmbhst.txt | 256 ++++++---- .../log_ggtt_mad_d_inl0_hrd1.txt | 249 +++++---- .../log_ggtt_mad_d_inl1_hrd0.txt | 253 ++++++---- .../log_ggtt_mad_d_inl1_hrd1.txt | 249 +++++---- .../log_ggtt_mad_f_inl0_hrd0.txt | 267 +++++----- .../log_ggtt_mad_f_inl0_hrd0_bridge.txt | 275 +++++----- .../log_ggtt_mad_f_inl0_hrd0_common.txt | 259 ++++++---- .../log_ggtt_mad_f_inl0_hrd0_rmbhst.txt | 270 +++++----- .../log_ggtt_mad_f_inl0_hrd1.txt | 267 +++++----- .../log_ggtt_mad_f_inl1_hrd0.txt | 267 +++++----- .../log_ggtt_mad_f_inl1_hrd1.txt | 267 +++++----- .../log_ggtt_mad_m_inl0_hrd0.txt | 253 ++++++---- .../log_ggtt_mad_m_inl0_hrd1.txt | 253 ++++++---- .../log_ggttg_mad_d_inl0_hrd0.txt | 278 +++++----- .../log_ggttg_mad_d_inl0_hrd0_bridge.txt | 288 ++++++----- .../log_ggttg_mad_d_inl0_hrd1.txt | 278 +++++----- .../log_ggttg_mad_f_inl0_hrd0.txt | 292 ++++++----- .../log_ggttg_mad_f_inl0_hrd0_bridge.txt | 302 ++++++----- .../log_ggttg_mad_f_inl0_hrd1.txt | 292 ++++++----- .../log_ggttg_mad_m_inl0_hrd0.txt | 274 +++++----- .../log_ggttg_mad_m_inl0_hrd1.txt | 274 +++++----- .../log_ggttgg_mad_d_inl0_hrd0.txt | 278 +++++----- .../log_ggttgg_mad_d_inl0_hrd0_bridge.txt | 288 ++++++----- .../log_ggttgg_mad_d_inl0_hrd0_common.txt | 262 ++++++---- .../log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt | 283 ++++++----- .../log_ggttgg_mad_d_inl0_hrd1.txt | 278 +++++----- .../log_ggttgg_mad_d_inl1_hrd0.txt | 286 ++++++----- .../log_ggttgg_mad_d_inl1_hrd1.txt | 282 ++++++----- .../log_ggttgg_mad_f_inl0_hrd0.txt | 294 ++++++----- .../log_ggttgg_mad_f_inl0_hrd0_bridge.txt | 304 ++++++----- .../log_ggttgg_mad_f_inl0_hrd0_common.txt | 288 ++++++----- .../log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt | 299 ++++++----- .../log_ggttgg_mad_f_inl0_hrd1.txt | 292 ++++++----- .../log_ggttgg_mad_f_inl1_hrd0.txt | 292 ++++++----- .../log_ggttgg_mad_f_inl1_hrd1.txt | 292 ++++++----- .../log_ggttgg_mad_m_inl0_hrd0.txt | 274 +++++----- .../log_ggttgg_mad_m_inl0_hrd1.txt | 274 +++++----- .../log_ggttggg_mad_d_inl0_hrd0.txt | 274 +++++----- .../log_ggttggg_mad_d_inl0_hrd0_bridge.txt | 284 ++++++----- .../log_ggttggg_mad_d_inl0_hrd1.txt | 274 +++++----- .../log_ggttggg_mad_f_inl0_hrd0.txt | 290 ++++++----- .../log_ggttggg_mad_f_inl0_hrd0_bridge.txt | 300 ++++++----- .../log_ggttggg_mad_f_inl0_hrd1.txt | 290 ++++++----- .../log_ggttggg_mad_m_inl0_hrd0.txt | 274 +++++----- .../log_ggttggg_mad_m_inl0_hrd1.txt | 274 +++++----- .../log_gqttq_mad_d_inl0_hrd0.txt | 243 +++++++-- .../log_gqttq_mad_d_inl0_hrd0_bridge.txt | 256 ++++++++-- .../log_gqttq_mad_d_inl0_hrd1.txt | 243 +++++++-- .../log_gqttq_mad_f_inl0_hrd0.txt | 243 +++++++-- .../log_gqttq_mad_f_inl0_hrd0_bridge.txt | 256 ++++++++-- .../log_gqttq_mad_f_inl0_hrd1.txt | 243 +++++++-- .../log_gqttq_mad_m_inl0_hrd0.txt | 243 +++++++-- .../log_gqttq_mad_m_inl0_hrd1.txt | 243 +++++++-- .../log_heftggbb_mad_d_inl0_hrd0.txt | 249 +++++---- .../log_heftggbb_mad_d_inl0_hrd1.txt | 249 +++++---- .../log_heftggbb_mad_f_inl0_hrd0.txt | 269 ++++++---- .../log_heftggbb_mad_f_inl0_hrd1.txt | 269 ++++++---- .../log_heftggbb_mad_m_inl0_hrd0.txt | 249 +++++---- .../log_heftggbb_mad_m_inl0_hrd1.txt | 249 +++++---- .../log_smeftggtttt_mad_d_inl0_hrd0.txt | 278 +++++----- .../log_smeftggtttt_mad_d_inl0_hrd1.txt | 278 +++++----- .../log_smeftggtttt_mad_f_inl0_hrd0.txt | 294 ++++++----- .../log_smeftggtttt_mad_f_inl0_hrd1.txt | 294 ++++++----- .../log_smeftggtttt_mad_m_inl0_hrd0.txt | 274 +++++----- .../log_smeftggtttt_mad_m_inl0_hrd1.txt | 274 +++++----- .../log_susyggt1t1_mad_d_inl0_hrd0.txt | 249 +++++---- .../log_susyggt1t1_mad_d_inl0_hrd1.txt | 249 +++++---- .../log_susyggt1t1_mad_f_inl0_hrd0.txt | 259 ++++++---- .../log_susyggt1t1_mad_f_inl0_hrd1.txt | 259 ++++++---- .../log_susyggt1t1_mad_m_inl0_hrd0.txt | 253 ++++++---- .../log_susyggt1t1_mad_m_inl0_hrd1.txt | 253 ++++++---- .../log_susyggtt_mad_d_inl0_hrd0.txt | 257 ++++++---- .../log_susyggtt_mad_d_inl0_hrd1.txt | 261 ++++++---- .../log_susyggtt_mad_f_inl0_hrd0.txt | 267 +++++----- .../log_susyggtt_mad_f_inl0_hrd1.txt | 267 +++++----- .../log_susyggtt_mad_m_inl0_hrd0.txt | 253 ++++++---- .../log_susyggtt_mad_m_inl0_hrd1.txt | 253 ++++++---- 126 files changed, 22557 insertions(+), 13860 deletions(-) diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index bf4ac49169..80212fc9f8 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -make USEBUILDDIR=1 BACKEND=hip + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2024-05-15_17:55:49 +DATE: 2024-05-14_21:29:23 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661518E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3798 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.5230s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5169s - [COUNTERS] Fortran MEs ( 1 ) : 0.0062s for 8192 events => throughput is 1.33E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7490s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7405s + [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.58E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661518E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1340s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1281s - [COUNTERS] Fortran MEs ( 1 ) : 0.0059s for 8192 events => throughput is 1.39E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1860s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1773s + [COUNTERS] Fortran MEs ( 1 ) : 0.0087s for 8192 events => throughput is 9.42E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x10_fortran > /tmp/valassia/output_eemumu_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/output_eemumu_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x10_fortran > /tmp/v [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2770s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2142s - [COUNTERS] Fortran MEs ( 1 ) : 0.0627s for 90112 events => throughput is 1.44E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3906s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2991s + [COUNTERS] Fortran MEs ( 1 ) : 0.0915s for 90112 events => throughput is 9.84E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09338 [9.3382715404661545E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1533s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1473s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0059s for 8192 events => throughput is 1.38E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1910s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1841s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0069s for 8192 events => throughput is 1.18E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661518E-002) and cpp (9.3382715404661532E-002) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715404661545E-002) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515602020000780E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2860s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2205s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0655s for 90112 events => throughput is 1.38E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3806s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3053s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0753s for 90112 events => throughput is 1.20E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000780E-002) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000753E-002) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.405897e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.198292e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.425037e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.189902e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661518E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1351s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1315s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0035s for 8192 events => throughput is 2.31E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1870s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1826s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0044s for 8192 events => throughput is 1.86E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661518E-002) and cpp (9.3382715404661518E-002) differ by less than 3E-14 (0.0) +OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715404661532E-002) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,8 +233,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -242,9 +242,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2553s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2164s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0390s for 90112 events => throughput is 2.31E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3505s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3034s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0470s for 90112 events => throughput is 1.92E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -255,14 +255,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000753E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.392866e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.910605e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.439421e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.993478e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,8 +276,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -285,13 +285,13 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1328s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1303s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.29E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1839s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1805s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0034s for 8192 events => throughput is 2.39E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661518E-002) and cpp (9.3382715404661532E-002) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715404661532E-002) differ by less than 3E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,114 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2409s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2136s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0272s for 90112 events => throughput is 3.31E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3398s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3038s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0360s for 90112 events => throughput is 2.50E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000753E-002) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000739E-002) differ by less than 3E-14 (3.3306690738754696e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.466511e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.544384e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.546981e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.669137e+06 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1591 events (found 1595 events) + [COUNTERS] PROGRAM TOTAL : 0.1836s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1804s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.59E+06 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715404661532E-002) differ by less than 3E-14 (0.0) -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1782 events (found 1787 events) + [COUNTERS] PROGRAM TOTAL : 0.3423s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3080s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0343s for 90112 events => throughput is 2.62E+06 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000739E-002) differ by less than 3E-14 (3.3306690738754696e-16) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.714748e+06 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.793958e+06 ) sec^-1 -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,28 +428,104 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 - [NGOODHEL] ngoodhel/ncomb = 16/16 +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661545E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1591 events (found 1595 events) + [COUNTERS] PROGRAM TOTAL : 0.1866s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1825s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0041s for 8192 events => throughput is 2.00E+06 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715404661532E-002) differ by less than 3E-14 (0.0) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1782 events (found 1787 events) + [COUNTERS] PROGRAM TOTAL : 0.3507s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3069s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0439s for 90112 events => throughput is 2.05E+06 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000739E-002) differ by less than 3E-14 (3.3306690738754696e-16) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.062300e+06 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.177526e+06 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.4136s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4131s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.75E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6079s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6074s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.64E+07 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661518E-002) and hip (9.3382715404661545E-002) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (9.3382715404661532E-002) and cuda (9.3382715404661532E-002) differ by less than 3E-14 (0.0) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -391,65 +537,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 - [NGOODHEL] ngoodhel/ncomb = 16/16 +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.4962s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4918s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0045s for 90112 events => throughput is 2.02E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7354s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7304s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0050s for 90112 events => throughput is 1.81E+07 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and hip (9.1515602020000753E-002) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (9.1515602020000766E-002) and cuda (9.1515602020000753E-002) differ by less than 3E-14 (1.1102230246251565e-16) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.173800e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.128413e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.535624e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.930164e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.258734e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.001091e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.870473e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.445549e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.239388e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.976571e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.963945e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.020573e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.186996e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.977453e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.568736e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.141008e+08 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index 1284773939..2651e2b252 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2024-05-15_17:56:01 +DATE: 2024-05-14_21:29:39 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661518E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3798 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.5097s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5038s - [COUNTERS] Fortran MEs ( 1 ) : 0.0059s for 8192 events => throughput is 1.40E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7223s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7137s + [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.54E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661518E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1360s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1301s - [COUNTERS] Fortran MEs ( 1 ) : 0.0059s for 8192 events => throughput is 1.39E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1844s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1759s + [COUNTERS] Fortran MEs ( 1 ) : 0.0085s for 8192 events => throughput is 9.58E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x10_fortran > /tmp/valassia/output_eemumu_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/output_eemumu_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x10_fortran > /tmp/v [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2776s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2152s - [COUNTERS] Fortran MEs ( 1 ) : 0.0624s for 90112 events => throughput is 1.44E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3930s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3006s + [COUNTERS] Fortran MEs ( 1 ) : 0.0924s for 90112 events => throughput is 9.75E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382701684199335E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09338 [9.3382703205998396E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1382s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1330s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0052s for 8192 events => throughput is 1.58E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1878s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1812s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 8192 events => throughput is 1.24E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661518E-002) and cpp (9.3382701684199335E-002) differ by less than 4E-4 (1.4692721372888684e-07) +OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382703205998396E-002) differ by less than 4E-4 (1.306308462512007e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515588842633111E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09152 [9.1515590123565249E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.4155s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3585s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0570s for 90112 events => throughput is 1.58E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3826s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3078s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0749s for 90112 events => throughput is 1.20E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515588842633111E-002) differ by less than 4E-4 (1.439903947186849e-07) +OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515590123565249E-002) differ by less than 4E-4 (1.2999352305698153e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.649401e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.250685e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.656662e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.267147e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382719831741665E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09338 [9.3382700723828302E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1322s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1301s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0021s for 8192 events => throughput is 3.87E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1844s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1816s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.97E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661518E-002) and cpp (9.3382719831741665E-002) differ by less than 4E-4 (4.740791825774693e-08) +OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382700723828302E-002) differ by less than 4E-4 (1.5721146218172777e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515606481761602E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09152 [9.1515587612890761E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2364s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2132s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0232s for 90112 events => throughput is 3.88E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3282s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2991s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0292s for 90112 events => throughput is 3.09E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515606481761602E-002) differ by less than 4E-4 (4.875410031246474e-08) +OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515587612890761E-002) differ by less than 4E-4 (1.5742791048545257e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.040179e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.188800e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.136369e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.348517e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382719700521907E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09338 [9.3382700679354239E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1328s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1310s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.61E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1838s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1814s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0023s for 8192 events => throughput is 3.51E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661518E-002) and cpp (9.3382719700521907E-002) differ by less than 4E-4 (4.6002735842876064e-08) +OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382700679354239E-002) differ by less than 4E-4 (1.576877179942926e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,190 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515606480805645E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09152 [9.1515587619408464E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2339s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2143s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0196s for 90112 events => throughput is 4.59E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3291s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3023s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0268s for 90112 events => throughput is 3.36E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515606480805645E-002) differ by less than 4E-4 (4.874365444607065e-08) +OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515587619408464E-002) differ by less than 4E-4 (1.573566908996682e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.888730e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.394421e+06 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.668083e+06 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09338 [9.3382700679354239E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1591 events (found 1595 events) + [COUNTERS] PROGRAM TOTAL : 0.1842s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1818s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0024s for 8192 events => throughput is 3.35E+06 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382700679354239E-002) differ by less than 4E-4 (1.576877179942926e-07) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09152 [9.1515587619408464E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1782 events (found 1787 events) + [COUNTERS] PROGRAM TOTAL : 0.3329s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3063s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0265s for 90112 events => throughput is 3.40E+06 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515587619408464E-002) differ by less than 4E-4 (1.573566908996682e-07) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.309620e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.024990e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.369965e+06 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09338 [9.3382704335459282E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1591 events (found 1595 events) + [COUNTERS] PROGRAM TOTAL : 0.1855s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1829s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 3.08E+06 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382704335459282E-002) differ by less than 4E-4 (1.1853587900123586e-07) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09152 [9.1515591296252558E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1782 events (found 1787 events) + [COUNTERS] PROGRAM TOTAL : 0.3340s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3060s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0280s for 90112 events => throughput is 3.22E+06 events/s -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515591296252558E-002) differ by less than 4E-4 (1.1717945325173673e-07) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.342049e+06 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.540514e+06 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,28 +504,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 - [NGOODHEL] ngoodhel/ncomb = 16/16 +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382704338101225E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09338 [9.3382706077425631E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.4099s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4096s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0003s for 8192 events => throughput is 2.60E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6040s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6035s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.64E+07 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661518E-002) and hip (9.3382704338101225E-002) differ by less than 4E-4 (1.1850758729892164e-07) +OK! xsec from fortran (9.3382715404661532E-002) and cuda (9.3382706077425631E-002) differ by less than 4E-4 (9.988182347875352e-08) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -391,65 +537,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 - [NGOODHEL] ngoodhel/ncomb = 16/16 +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515591361999701E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09152 [9.1515592892887687E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.5245s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5216s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 90112 events => throughput is 3.12E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7354s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7306s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0047s for 90112 events => throughput is 1.91E+07 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and hip (9.1515591361999701E-002) differ by less than 4E-4 (1.1646102771045719e-07) +OK! xsec from fortran (9.1515602020000766E-002) and cuda (9.1515592892887687E-002) differ by less than 4E-4 (9.973286385633884e-08) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.738506e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.575616e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.062982e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.796191e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.296230e+08 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.392977e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.605708e+08 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.031140e+09 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.343067e+08 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.306294e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.717215e+08 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.253384e+09 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.103812e+08 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.748696e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.678644e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.478402e+08 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index 8615124c0e..c72828112e 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2024-05-15_17:56:12 +DATE: 2024-05-14_21:29:56 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661518E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3798 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.5022s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4963s - [COUNTERS] Fortran MEs ( 1 ) : 0.0059s for 8192 events => throughput is 1.39E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7203s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7117s + [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.52E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661518E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1348s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1290s - [COUNTERS] Fortran MEs ( 1 ) : 0.0059s for 8192 events => throughput is 1.40E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1854s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1768s + [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.55E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x10_fortran > /tmp/valassia/output_eemumu_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/output_eemumu_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x10_fortran > /tmp/v [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2788s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2161s - [COUNTERS] Fortran MEs ( 1 ) : 0.0627s for 90112 events => throughput is 1.44E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3927s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3002s + [COUNTERS] Fortran MEs ( 1 ) : 0.0925s for 90112 events => throughput is 9.75E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715420701354E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09338 [9.3382715420701395E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1396s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1336s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0060s for 8192 events => throughput is 1.37E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1918s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1847s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0071s for 8192 events => throughput is 1.15E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661518E-002) and cpp (9.3382715420701354E-002) differ by less than 2E-4 (1.717646025412023e-10) +OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715420701395E-002) differ by less than 2E-4 (1.7176482458580722e-10) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,8 +157,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -166,9 +166,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602033080859E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3953s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3295s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0658s for 90112 events => throughput is 1.37E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3882s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3114s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0768s for 90112 events => throughput is 1.17E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -179,14 +179,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602033080859E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.402038e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.190112e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.425726e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.202467e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,8 +200,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,13 +209,13 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715420701354E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1337s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1302s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0035s for 8192 events => throughput is 2.37E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1883s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1842s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0041s for 8192 events => throughput is 1.97E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661518E-002) and cpp (9.3382715420701354E-002) differ by less than 2E-4 (1.717646025412023e-10) +OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715420701354E-002) differ by less than 2E-4 (1.7176438049659737e-10) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,8 +233,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -242,9 +242,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602033080859E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2536s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2157s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0380s for 90112 events => throughput is 2.37E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3510s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3054s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0456s for 90112 events => throughput is 1.98E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -255,14 +255,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602033080859E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.462735e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.027888e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.509349e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.074464e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,8 +276,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -285,13 +285,13 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1318s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1292s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.20E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1854s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1821s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.47E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661518E-002) and cpp (9.3382715383664494E-002) differ by less than 2E-4 (2.2484913930753692e-10) +OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715383664494E-002) differ by less than 2E-4 (2.2484925032983938e-10) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,8 +309,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -318,9 +318,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2462s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2179s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0282s for 90112 events => throughput is 3.19E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3387s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3028s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0359s for 90112 events => throughput is 2.51E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -331,22 +331,92 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602022697845E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.347503e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.529645e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.432398e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.689842e+06 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1591 events (found 1595 events) + [COUNTERS] PROGRAM TOTAL : 0.1848s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1817s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.69E+06 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715383664494E-002) differ by less than 2E-4 (2.2484925032983938e-10) -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1782 events (found 1787 events) + [COUNTERS] PROGRAM TOTAL : 0.3386s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3043s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0343s for 90112 events => throughput is 2.63E+06 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602022697845E-002) differ by less than 2E-4 (2.947131427788463e-11) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.665204e+06 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.827156e+06 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,28 +428,104 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 - [NGOODHEL] ngoodhel/ncomb = 16/16 +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715392009222E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.4143s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4138s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.78E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.1860s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1821s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0039s for 8192 events => throughput is 2.12E+06 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661518E-002) and hip (9.3382715392009222E-002) differ by less than 2E-4 (1.3548862032308762e-10) +OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715383664494E-002) differ by less than 2E-4 (2.2484925032983938e-10) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1782 events (found 1787 events) + [COUNTERS] PROGRAM TOTAL : 0.3469s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3049s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0420s for 90112 events => throughput is 2.14E+06 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602022697845E-002) differ by less than 2E-4 (2.947131427788463e-11) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.187638e+06 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.271730e+06 ) sec^-1 -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09338 [9.3382715392009194E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1591 events (found 1595 events) + [COUNTERS] PROGRAM TOTAL : 0.6078s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6073s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.62E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.3382715404661532E-002) and cuda (9.3382715392009194E-002) differ by less than 2E-4 (1.3548906441229747e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -391,65 +537,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 - [NGOODHEL] ngoodhel/ncomb = 16/16 +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602021089631E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.5149s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5106s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0044s for 90112 events => throughput is 2.07E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7346s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7297s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0049s for 90112 events => throughput is 1.83E+07 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and hip (9.1515602021089631E-002) differ by less than 2E-4 (1.1898038110302878e-11) +OK! xsec from fortran (9.1515602020000766E-002) and cuda (9.1515602021089631E-002) differ by less than 2E-4 (1.1898038110302878e-11) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.170122e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.048447e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.560446e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.874128e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.335116e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.969250e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.884712e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.511559e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.310770e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.951647e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.943226e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.090814e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.179111e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.977042e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.566543e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.160627e+08 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index b04658a765..d1f7d6c917 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx + +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-05-15_17:56:24 +DATE: 2024-05-14_21:30:13 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=0 + [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 2601 events (found 5405 events) - [COUNTERS] PROGRAM TOTAL : 2.5934s - [COUNTERS] Fortran Overhead ( 0 ) : 2.5646s - [COUNTERS] Fortran MEs ( 1 ) : 0.0288s for 8192 events => throughput is 2.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8150s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7712s + [COUNTERS] Fortran MEs ( 1 ) : 0.0439s for 8192 events => throughput is 1.87E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=0 + [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.5948s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5662s - [COUNTERS] Fortran MEs ( 1 ) : 0.0286s for 8192 events => throughput is 2.86E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4153s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3722s + [COUNTERS] Fortran MEs ( 1 ) : 0.0431s for 8192 events => throughput is 1.90E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x10_fortran > /tmp/valassia/output_ggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/output_ggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x10_fortran > /tmp/val [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.3563s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0428s - [COUNTERS] Fortran MEs ( 1 ) : 0.3135s for 90112 events => throughput is 2.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7541s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2769s + [COUNTERS] Fortran MEs ( 1 ) : 0.4771s for 90112 events => throughput is 1.89E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3663s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3342s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0321s for 8192 events => throughput is 2.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4517s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4124s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0393s for 8192 events => throughput is 2.08E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756626) and cpp (47.094184803756640) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (47.094184803756640) and cpp (47.094184803756647) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105695279989099] fbridge_mode=1 + [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4118s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0583s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3535s for 90112 events => throughput is 2.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8277s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3920s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4358s for 90112 events => throughput is 2.07E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989099) differ by less than 3E-14 (3.3306690738754696e-16) +OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989121) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.609025e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.103817e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.618885e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.114937e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756619] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3247s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3068s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0179s for 8192 events => throughput is 4.58E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4324s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4093s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0231s for 8192 events => throughput is 3.54E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756626) and cpp (47.094184803756619) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (47.094184803756640) and cpp (47.094184803756640) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105695279989085] fbridge_mode=1 + [XSECTION] Cross section = 47.11 [47.105695279989106] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.2449s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0475s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1974s for 90112 events => throughput is 4.56E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5932s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3412s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2520s for 90112 events => throughput is 3.58E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989085) differ by less than 3E-14 (5.551115123125783e-16) +OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989106) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.631092e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.529919e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.635903e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.626084e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3099s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2996s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0103s for 8192 events => throughput is 7.96E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4008s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3871s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0137s for 8192 events => throughput is 5.97E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756626) and cpp (47.094184803756640) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (47.094184803756640) and cpp (47.094184803756626) differ by less than 3E-14 (3.3306690738754696e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,114 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=1 + [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.1527s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0393s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1135s for 90112 events => throughput is 7.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4953s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3368s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1584s for 90112 events => throughput is 5.69E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989114) differ by less than 3E-14 (0.0) +OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989121) differ by less than 3E-14 (2.220446049250313e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.251097e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.599755e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.307881e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.587845e+05 ) sec^-1 -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=1 + [UNWEIGHT] Wrote 1603 events (found 1608 events) + [COUNTERS] PROGRAM TOTAL : 0.3964s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3835s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0129s for 8192 events => throughput is 6.33E+05 events/s -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! xsec from fortran (47.094184803756640) and cpp (47.094184803756626) differ by less than 3E-14 (3.3306690738754696e-16) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 + [UNWEIGHT] Wrote 1744 events (found 1749 events) + [COUNTERS] PROGRAM TOTAL : 1.4805s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3407s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1399s for 90112 events => throughput is 6.44E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989121) differ by less than 3E-14 (2.220446049250313e-16) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.631615e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.759324e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,8 +428,84 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=1 + [UNWEIGHT] Wrote 1603 events (found 1608 events) + [COUNTERS] PROGRAM TOTAL : 0.4140s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3923s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0216s for 8192 events => throughput is 3.78E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.094184803756640) and cpp (47.094184803756626) differ by less than 3E-14 (3.3306690738754696e-16) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 + [UNWEIGHT] Wrote 1744 events (found 1749 events) + [COUNTERS] PROGRAM TOTAL : 1.5804s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3435s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2370s for 90112 events => throughput is 3.80E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989121) differ by less than 3E-14 (2.220446049250313e-16) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.786863e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.873596e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -367,19 +513,19 @@ Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x1_c [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.5722s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5714s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.10E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.8079s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8073s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.45E+07 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756626) and hip (47.094184803756640) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (47.094184803756640) and cuda (47.094184803756640) differ by less than 3E-14 (0.0) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -391,8 +537,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -400,56 +546,58 @@ Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x10_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.3209s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3132s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0077s for 90112 events => throughput is 1.17E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7757s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7691s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0067s for 90112 events => throughput is 1.35E+07 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and hip (47.105695279989121) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (47.105695279989114) and cuda (47.105695279989121) differ by less than 3E-14 (2.220446049250313e-16) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.313283e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.922671e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.008251e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.622319e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.760333e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.182413e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.753176e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.074544e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.804807e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.169853e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.946696e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.157576e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.747216e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.187954e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.149230e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.069859e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 8394c1f832..e1be3248ae 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=hip + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-05-15_17:56:47 +DATE: 2024-05-14_21:30:39 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=0 + [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 2601 events (found 5405 events) - [COUNTERS] PROGRAM TOTAL : 0.5837s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5552s - [COUNTERS] Fortran MEs ( 1 ) : 0.0285s for 8192 events => throughput is 2.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8170s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7734s + [COUNTERS] Fortran MEs ( 1 ) : 0.0435s for 8192 events => throughput is 1.88E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=0 + [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3175s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2888s - [COUNTERS] Fortran MEs ( 1 ) : 0.0287s for 8192 events => throughput is 2.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4189s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3755s + [COUNTERS] Fortran MEs ( 1 ) : 0.0434s for 8192 events => throughput is 1.89E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x10_fortran > /tmp/valassia/output_ggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/output_ggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x10_fortran > /tmp/val [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.3378s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0255s - [COUNTERS] Fortran MEs ( 1 ) : 0.3123s for 90112 events => throughput is 2.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7459s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2697s + [COUNTERS] Fortran MEs ( 1 ) : 0.4762s for 90112 events => throughput is 1.89E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094178241446492] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094179780921394] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3446s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3171s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0275s for 8192 events => throughput is 2.98E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4442s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4078s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0365s for 8192 events => throughput is 2.25E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756626) and cpp (47.094178241446492) differ by less than 4E-4 (1.3934438314322506e-07) +OK! xsec from fortran (47.094184803756640) and cpp (47.094179780921394) differ by less than 4E-4 (1.0665510541407741e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105686930681671] fbridge_mode=1 + [XSECTION] Cross section = 47.11 [47.105688579298537] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.3548s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0514s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3035s for 90112 events => throughput is 2.97E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7649s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3605s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4044s for 90112 events => throughput is 2.23E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105686930681671) differ by less than 4E-4 (1.7724624157278157e-07) +OK! xsec from fortran (47.105695279989114) and cpp (47.105688579298537) differ by less than 4E-4 (1.4224799227413598e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.087443e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.289579e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.095881e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.288922e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094176373190514] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094175850060040] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3143s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3014s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0129s for 8192 events => throughput is 6.35E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4010s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3854s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0156s for 8192 events => throughput is 5.26E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756626) and cpp (47.094176373190514) differ by less than 4E-4 (1.7901501314643298e-07) +OK! xsec from fortran (47.094184803756640) and cpp (47.094175850060040) differ by less than 4E-4 (1.9012318908107062e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105685173093654] fbridge_mode=1 + [XSECTION] Cross section = 47.11 [47.105684763984058] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.2445s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1030s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1416s for 90112 events => throughput is 6.37E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5082s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3361s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1720s for 90112 events => throughput is 5.24E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105685173093654) differ by less than 4E-4 (2.1455782361901043e-07) +OK! xsec from fortran (47.105695279989114) and cpp (47.105684763984058) differ by less than 4E-4 (2.2324275217311396e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.456336e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.210735e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.800046e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.237575e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094174474272364] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094173652938650] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3239s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3176s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 8192 events => throughput is 1.31E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3886s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3803s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0083s for 8192 events => throughput is 9.85E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756626) and cpp (47.094174474272364) differ by less than 4E-4 (2.1933672500473733e-07) +OK! xsec from fortran (47.094184803756640) and cpp (47.094173652938650) differ by less than 4E-4 (2.3677696170398832e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,190 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105684585116684] fbridge_mode=1 + [XSECTION] Cross section = 47.11 [47.105684048677361] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.1025s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0337s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0687s for 90112 events => throughput is 1.31E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.4256s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3332s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0924s for 90112 events => throughput is 9.76E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105684585116684) differ by less than 4E-4 (2.2703990176786704e-07) +OK! xsec from fortran (47.105695279989114) and cpp (47.105684048677361) differ by less than 4E-4 (2.384278946498952e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.370612e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.818330e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.013113e+06 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.09 [47.094173652938650] fbridge_mode=1 + [UNWEIGHT] Wrote 1603 events (found 1608 events) + [COUNTERS] PROGRAM TOTAL : 0.3954s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3877s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0077s for 8192 events => throughput is 1.07E+06 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.094184803756640) and cpp (47.094173652938650) differ by less than 4E-4 (2.3677696170398832e-07) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.11 [47.105684048677361] fbridge_mode=1 + [UNWEIGHT] Wrote 1744 events (found 1749 events) + [COUNTERS] PROGRAM TOTAL : 1.4218s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3364s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0854s for 90112 events => throughput is 1.05E+06 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.105695279989114) and cpp (47.105684048677361) differ by less than 4E-4 (2.384278946498952e-07) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.068593e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.381931e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.099468e+06 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.09 [47.094178213275804] fbridge_mode=1 + [UNWEIGHT] Wrote 1603 events (found 1608 events) + [COUNTERS] PROGRAM TOTAL : 0.3957s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3845s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0112s for 8192 events => throughput is 7.32E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.094184803756640) and cpp (47.094178213275804) differ by less than 4E-4 (1.3994256109484127e-07) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.11 [47.105688407939567] fbridge_mode=1 + [UNWEIGHT] Wrote 1744 events (found 1749 events) + [COUNTERS] PROGRAM TOTAL : 1.4559s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3326s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1233s for 90112 events => throughput is 7.31E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.105695279989114) and cpp (47.105688407939567) differ by less than 4E-4 (1.4588574703822133e-07) -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.366839e+05 ) sec^-1 -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.492192e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,28 +504,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094176770070867] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094184344050284] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.5844s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5840s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0004s for 8192 events => throughput is 2.10E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.8062s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8057s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.49E+07 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756626) and hip (47.094176770070867) differ by less than 4E-4 (1.705876382374072e-07) +OK! xsec from fortran (47.094184803756640) and cuda (47.094184344050284) differ by less than 4E-4 (9.761425112664313e-09) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -391,65 +537,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105687115703695] fbridge_mode=1 + [XSECTION] Cross section = 47.11 [47.105694586476879] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.3211s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3174s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0037s for 90112 events => throughput is 2.45E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7618s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7560s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0058s for 90112 events => throughput is 1.56E+07 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and hip (47.105687115703695) differ by less than 4E-4 (1.733184357144424e-07) +OK! xsec from fortran (47.105695279989114) and cuda (47.105694586476879) differ by less than 4E-4 (1.4722471020078842e-08) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.990316e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.250794e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.129345e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.270403e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.057346e+08 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.044831e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.981637e+08 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.775580e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.063827e+08 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.128987e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.077523e+08 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.894720e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.183964e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.630183e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.370435e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.416620e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index 5052fa8251..547b1e3490 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' - make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-05-15_17:57:05 +DATE: 2024-05-14_21:31:05 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=0 + [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 2601 events (found 5405 events) - [COUNTERS] PROGRAM TOTAL : 0.5865s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5579s - [COUNTERS] Fortran MEs ( 1 ) : 0.0286s for 8192 events => throughput is 2.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8179s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7740s + [COUNTERS] Fortran MEs ( 1 ) : 0.0438s for 8192 events => throughput is 1.87E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=0 + [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3179s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2893s - [COUNTERS] Fortran MEs ( 1 ) : 0.0285s for 8192 events => throughput is 2.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4094s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3663s + [COUNTERS] Fortran MEs ( 1 ) : 0.0431s for 8192 events => throughput is 1.90E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x10_fortran > /tmp/valassia/output_ggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/output_ggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x10_fortran > /tmp/val [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.3405s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0280s - [COUNTERS] Fortran MEs ( 1 ) : 0.3124s for 90112 events => throughput is 2.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7458s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2700s + [COUNTERS] Fortran MEs ( 1 ) : 0.4758s for 90112 events => throughput is 1.89E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,8 +124,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -133,13 +133,13 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094186141863901] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3542s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3211s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0331s for 8192 events => throughput is 2.48E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4517s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4119s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0398s for 8192 events => throughput is 2.06E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756626) and cpp (47.094186141863901) differ by less than 2E-4 (2.8413428942997143e-08) +OK! xsec from fortran (47.094184803756640) and cpp (47.094186141863901) differ by less than 2E-4 (2.8413428720952538e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,8 +157,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -166,9 +166,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696630006634] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.5155s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1517s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3638s for 90112 events => throughput is 2.48E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7993s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3599s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4394s for 90112 events => throughput is 2.05E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -179,14 +179,14 @@ OK! xsec from fortran (47.105695279989114) and cpp (47.105696630006634) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.531708e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.083387e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.538603e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.080671e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094186141863908] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094186141863901] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3250s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3072s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0178s for 8192 events => throughput is 4.60E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4168s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3942s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0226s for 8192 events => throughput is 3.63E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756626) and cpp (47.094186141863908) differ by less than 2E-4 (2.8413429165041748e-08) +OK! xsec from fortran (47.094184803756640) and cpp (47.094186141863901) differ by less than 2E-4 (2.8413428720952538e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,8 +233,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -242,9 +242,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696630006626] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.2418s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0452s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1966s for 90112 events => throughput is 4.58E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5919s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3452s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2467s for 90112 events => throughput is 3.65E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -255,14 +255,14 @@ OK! xsec from fortran (47.105695279989114) and cpp (47.105696630006626) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.681520e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.684790e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.519243e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.710839e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094186193208834] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3088s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2987s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0101s for 8192 events => throughput is 8.13E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4003s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3863s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0140s for 8192 events => throughput is 5.84E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756626) and cpp (47.094186193208834) differ by less than 2E-4 (2.9503689491505725e-08) +OK! xsec from fortran (47.094184803756640) and cpp (47.094186169585456) differ by less than 2E-4 (2.9002069412698006e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,114 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105696667630852] fbridge_mode=1 + [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.1461s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0355s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1106s for 90112 events => throughput is 8.15E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4924s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3396s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1528s for 90112 events => throughput is 5.90E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105696667630852) differ by less than 2E-4 (2.9458046002517335e-08) +OK! xsec from fortran (47.105695279989114) and cpp (47.105696663215774) differ by less than 2E-4 (2.9364318976377035e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.429153e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.936975e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.506856e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.056847e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1 + [UNWEIGHT] Wrote 1603 events (found 1608 events) + [COUNTERS] PROGRAM TOTAL : 0.3980s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3847s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0133s for 8192 events => throughput is 6.17E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +OK! xsec from fortran (47.094184803756640) and cpp (47.094186169585456) differ by less than 2E-4 (2.9002069412698006e-08) -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1 + [UNWEIGHT] Wrote 1744 events (found 1749 events) + [COUNTERS] PROGRAM TOTAL : 1.4785s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3422s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1363s for 90112 events => throughput is 6.61E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.105695279989114) and cpp (47.105696663215774) differ by less than 2E-4 (2.9364318976377035e-08) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.654373e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.774862e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,28 +428,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184798437837] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.5745s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5738s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.17E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.4158s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3952s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0206s for 8192 events => throughput is 3.98E+05 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756626) and hip (47.094184798437837) differ by less than 2E-4 (1.1293943558143837e-10) +OK! xsec from fortran (47.094184803756640) and cpp (47.094186169585456) differ by less than 2E-4 (2.9002069412698006e-08) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -391,8 +461,84 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1 + [UNWEIGHT] Wrote 1744 events (found 1749 events) + [COUNTERS] PROGRAM TOTAL : 1.5859s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3571s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2288s for 90112 events => throughput is 3.94E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.105695279989114) and cpp (47.105696663215774) differ by less than 2E-4 (2.9364318976377035e-08) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.944890e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.998332e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.09 [47.094184798437830] fbridge_mode=1 + [UNWEIGHT] Wrote 1603 events (found 1608 events) + [COUNTERS] PROGRAM TOTAL : 0.8121s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8115s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.45E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.094184803756640) and cuda (47.094184798437830) differ by less than 2E-4 (1.1293987967064822e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -400,56 +546,58 @@ Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x10_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279068492] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.3197s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3120s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0077s for 90112 events => throughput is 1.18E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7651s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7584s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0067s for 90112 events => throughput is 1.35E+07 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and hip (47.105695279068492) differ by less than 2E-4 (1.954369999168648e-11) +OK! xsec from fortran (47.105695279989114) and cuda (47.105695279068492) differ by less than 2E-4 (1.954369999168648e-11) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.359184e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.005307e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.029386e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.597005e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.791869e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.161473e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.800661e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.067591e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.806095e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.173443e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.026202e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.146779e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.778718e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.155242e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.169422e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.034637e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index 46571ae40b..20b4910bf1 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -make USEBUILDDIR=1 BACKEND=hip + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2024-05-15_17:57:22 +DATE: 2024-05-14_21:31:32 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112317668354764] fbridge_mode=0 + [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 365 events (found 1496 events) - [COUNTERS] PROGRAM TOTAL : 0.5675s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3642s - [COUNTERS] Fortran MEs ( 1 ) : 0.2033s for 8192 events => throughput is 4.03E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7043s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3683s + [COUNTERS] Fortran MEs ( 1 ) : 0.3360s for 8192 events => throughput is 2.44E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112317668354764] fbridge_mode=0 + [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4587s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2556s - [COUNTERS] Fortran MEs ( 1 ) : 0.2031s for 8192 events => throughput is 4.03E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6655s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3301s + [COUNTERS] Fortran MEs ( 1 ) : 0.3353s for 8192 events => throughput is 2.44E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x10_fortran > /tmp/valassia/output_ggttg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/output_ggttg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9239236471252555E-002] fbridge_mode=0 - [UNWEIGHT] Wrote 1899 events (found 1904 events) - [COUNTERS] PROGRAM TOTAL : 3.4242s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1958s - [COUNTERS] Fortran MEs ( 1 ) : 2.2284s for 90112 events => throughput is 4.04E+04 events/s + [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 5.2767s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5820s + [COUNTERS] Fortran MEs ( 1 ) : 3.6947s for 90112 events => throughput is 2.44E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112317668354763] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.8260s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5438s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2823s for 8192 events => throughput is 2.90E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.0164s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6720s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3444s for 8192 events => throughput is 2.38E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112317668354764) and cpp (0.10112317668354763) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748607749110) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9239236471252514E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1899 events (found 1904 events) - [COUNTERS] PROGRAM TOTAL : 4.5846s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4896s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.0950s for 90112 events => throughput is 2.91E+04 events/s + [XSECTION] Cross section = 0.07924 [7.9238481932717666E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 5.6982s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9128s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.7854s for 90112 events => throughput is 2.38E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9239236471252555E-002) and cpp (7.9239236471252514E-002) differ by less than 3E-14 (5.551115123125783e-16) +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717666E-002) differ by less than 3E-14 (6.661338147750939e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.986081e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.444585e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.975460e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.447688e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112317668354515] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112748607748863] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5359s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3966s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1392s for 8192 events => throughput is 5.88E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6808s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5022s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1786s for 8192 events => throughput is 4.59E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112317668354764) and cpp (0.10112317668354515) differ by less than 3E-14 (2.475797344914099e-14) +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748607748863) differ by less than 3E-14 (2.453592884421596e-14) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9239236471252514E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1899 events (found 1904 events) - [COUNTERS] PROGRAM TOTAL : 2.8730s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3402s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.5329s for 90112 events => throughput is 5.88E+04 events/s + [XSECTION] Cross section = 0.07924 [7.9238481932717666E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 3.7507s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7497s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.0010s for 90112 events => throughput is 4.50E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9239236471252555E-002) and cpp (7.9239236471252514E-002) differ by less than 3E-14 (5.551115123125783e-16) +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717666E-002) differ by less than 3E-14 (6.661338147750939e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.083131e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.665141e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.089350e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.657093e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112317668354763] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.3912s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3256s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0657s for 8192 events => throughput is 1.25E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5068s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4164s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0904s for 8192 events => throughput is 9.06E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112317668354764) and cpp (0.10112317668354763) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748607749110) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,190 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9239236471252555E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1899 events (found 1904 events) - [COUNTERS] PROGRAM TOTAL : 1.9916s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2710s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7206s for 90112 events => throughput is 1.25E+05 events/s + [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 2.6510s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6594s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9915s for 90112 events => throughput is 9.09E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9239236471252555E-002) and cpp (7.9239236471252555E-002) differ by less than 3E-14 (0.0) +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717722E-002) differ by less than 3E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.257360e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.408887e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.495539e+04 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 + [UNWEIGHT] Wrote 386 events (found 1179 events) + [COUNTERS] PROGRAM TOTAL : 0.4848s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4059s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0789s for 8192 events => throughput is 1.04E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748607749110) differ by less than 3E-14 (1.1102230246251565e-16) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 2.5315s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6496s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8819s for 90112 events => throughput is 1.02E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717722E-002) differ by less than 3E-14 (0.0) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.069797e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.256779e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.059648e+05 ) sec^-1 -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 + [UNWEIGHT] Wrote 386 events (found 1179 events) + [COUNTERS] PROGRAM TOTAL : 0.5571s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4421s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1150s for 8192 events => throughput is 7.13E+04 events/s -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748607749110) differ by less than 3E-14 (1.1102230246251565e-16) -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 2.9579s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6912s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2666s for 90112 events => throughput is 7.11E+04 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717722E-002) differ by less than 3E-14 (0.0) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.192271e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.202521e+04 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,28 +504,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112317668354760] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5743s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5667s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0076s for 8192 events => throughput is 1.07E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7731s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7677s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.52E+06 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112317668354764) and hip (0.10112317668354760) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (0.10112748607749111) and cuda (0.10112748607749111) differ by less than 3E-14 (0.0) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -391,65 +537,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9239236471252555E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1899 events (found 1904 events) - [COUNTERS] PROGRAM TOTAL : 1.5845s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5009s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0836s for 90112 events => throughput is 1.08E+06 events/s + [XSECTION] Cross section = 0.07924 [7.9238481932717736E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 2.0321s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0091s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0230s for 90112 events => throughput is 3.91E+06 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9239236471252555E-002) and hip (7.9239236471252555E-002) differ by less than 3E-14 (0.0) +OK! xsec from fortran (7.9238481932717722E-002) and cuda (7.9238481932717736E-002) differ by less than 3E-14 (2.220446049250313e-16) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.115270e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.630446e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.136565e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.998102e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.679526e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.885438e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.304026e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.245221e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.674473e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.903636e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.843441e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.255324e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.667439e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.895996e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.438819e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.770834e+06 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 5ac8280e80..2d40c641c2 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -make USEBUILDDIR=1 BACKEND=hip -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2024-05-15_17:57:53 +DATE: 2024-05-14_21:32:16 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112317668354764] fbridge_mode=0 + [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 365 events (found 1496 events) - [COUNTERS] PROGRAM TOTAL : 0.4838s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2803s - [COUNTERS] Fortran MEs ( 1 ) : 0.2035s for 8192 events => throughput is 4.03E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7000s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3644s + [COUNTERS] Fortran MEs ( 1 ) : 0.3356s for 8192 events => throughput is 2.44E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112317668354764] fbridge_mode=0 + [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4590s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2559s - [COUNTERS] Fortran MEs ( 1 ) : 0.2030s for 8192 events => throughput is 4.03E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6615s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3271s + [COUNTERS] Fortran MEs ( 1 ) : 0.3345s for 8192 events => throughput is 2.45E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x10_fortran > /tmp/valassia/output_ggttg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/output_ggttg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9239236471252555E-002] fbridge_mode=0 - [UNWEIGHT] Wrote 1899 events (found 1904 events) - [COUNTERS] PROGRAM TOTAL : 3.4298s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2024s - [COUNTERS] Fortran MEs ( 1 ) : 2.2274s for 90112 events => throughput is 4.05E+04 events/s + [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 5.2699s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5798s + [COUNTERS] Fortran MEs ( 1 ) : 3.6901s for 90112 events => throughput is 2.44E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112291597608296] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112722621426752] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.7725s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5112s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2613s for 8192 events => throughput is 3.14E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.9861s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6530s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3330s for 8192 events => throughput is 2.46E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112317668354764) and cpp (0.10112291597608296) differ by less than 4E-4 (2.5781178285555484e-06) +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112722621426752) differ by less than 4E-4 (2.569659680817793e-06) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9239221732791437E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1899 events (found 1904 events) - [COUNTERS] PROGRAM TOTAL : 4.3191s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4552s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.8638s for 90112 events => throughput is 3.15E+04 events/s + [XSECTION] Cross section = 0.07924 [7.9238468310179624E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 5.5670s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8948s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.6722s for 90112 events => throughput is 2.45E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9239236471252555E-002) and cpp (7.9239221732791437E-002) differ by less than 4E-4 (1.8599953477416165e-07) +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238468310179624E-002) differ by less than 4E-4 (1.719182115555995e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.247317e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.527255e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.257409e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.545228e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112290421591680] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112720710186394] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4076s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3320s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0756s for 8192 events => throughput is 1.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5309s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4314s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0996s for 8192 events => throughput is 8.23E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112317668354764) and cpp (0.10112290421591680) differ by less than 4E-4 (2.6944132867079418e-06) +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112720710186394) differ by less than 4E-4 (2.758652844936371e-06) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9239212368085274E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1899 events (found 1904 events) - [COUNTERS] PROGRAM TOTAL : 2.1126s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2753s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8372s for 90112 events => throughput is 1.08E+05 events/s + [XSECTION] Cross section = 0.07924 [7.9238454786658835E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 2.7616s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6665s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0951s for 90112 events => throughput is 8.23E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9239236471252555E-002) and cpp (7.9239212368085274E-002) differ by less than 4E-4 (3.0418222529693395e-07) +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238454786658835E-002) differ by less than 4E-4 (3.4258681169685445e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.096350e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.414768e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.092149e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.429788e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112291415112837] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112721766950902] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.3272s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2931s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0341s for 8192 events => throughput is 2.40E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4195s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3735s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0461s for 8192 events => throughput is 1.78E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112317668354764) and cpp (0.10112291415112837) differ by less than 4E-4 (2.5961646764605106e-06) +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112721766950902) differ by less than 4E-4 (2.654154597325764e-06) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,190 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9239211617250407E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1899 events (found 1904 events) - [COUNTERS] PROGRAM TOTAL : 1.6120s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2368s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3752s for 90112 events => throughput is 2.40E+05 events/s + [XSECTION] Cross section = 0.07924 [7.9238453735016964E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 2.1275s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6152s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5123s for 90112 events => throughput is 1.76E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9239236471252555E-002) and cpp (7.9239211617250407E-002) differ by less than 4E-4 (3.136577692020026e-07) +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238453735016964E-002) differ by less than 4E-4 (3.5585866953180556e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.453372e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.835666e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.794023e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.1011 [0.10112721766950902] fbridge_mode=1 + [UNWEIGHT] Wrote 386 events (found 1179 events) + [COUNTERS] PROGRAM TOTAL : 0.4095s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3681s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0414s for 8192 events => throughput is 1.98E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112721766950902) differ by less than 4E-4 (2.654154597325764e-06) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07924 [7.9238453735016964E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 2.0664s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6096s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4568s for 90112 events => throughput is 1.97E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238453735016964E-002) differ by less than 4E-4 (3.5585866953180556e-07) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.989836e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.459554e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.035089e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.1011 [0.10112723387847480] fbridge_mode=1 + [UNWEIGHT] Wrote 386 events (found 1179 events) + [COUNTERS] PROGRAM TOTAL : 0.4388s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3836s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0552s for 8192 events => throughput is 1.48E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112723387847480) differ by less than 4E-4 (2.4938721023826105e-06) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07924 [7.9238464410949921E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 2.2419s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6268s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6150s for 90112 events => throughput is 1.47E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238464410949921E-002) differ by less than 4E-4 (2.211270000440635e-07) -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.504406e+05 ) sec^-1 -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.497982e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,28 +504,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112292787307366] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112726034625694] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5497s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5477s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0021s for 8192 events => throughput is 3.94E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7631s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7623s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0009s for 8192 events => throughput is 9.63E+06 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112317668354764) and hip (0.10112292787307366) differ by less than 4E-4 (2.4604693221741414e-06) +OK! xsec from fortran (0.10112748607749111) and cuda (0.10112726034625694) differ by less than 4E-4 (2.2321452152196386e-06) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -391,65 +537,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9239222545537072E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1899 events (found 1904 events) - [COUNTERS] PROGRAM TOTAL : 1.5151s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4928s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0223s for 90112 events => throughput is 4.05E+06 events/s + [XSECTION] Cross section = 0.07924 [7.9238473828077680E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 2.0121s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0022s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0099s for 90112 events => throughput is 9.09E+06 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9239236471252555E-002) and hip (7.9239222545537072E-002) differ by less than 4E-4 (1.7574267630049434e-07) +OK! xsec from fortran (7.9238481932717722E-002) and cuda (7.9238473828077680E-002) differ by less than 4E-4 (1.0228161673175862e-07) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.616507e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.300251e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.525364e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.847750e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.469095e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.699331e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.085807e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.419025e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.437114e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.716824e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.635195e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.489710e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.395969e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.582209e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.280775e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.625187e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index b81dae3d11..799e6a6c40 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg + +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2024-05-15_17:58:20 +DATE: 2024-05-14_21:32:55 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112317668354764] fbridge_mode=0 + [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 365 events (found 1496 events) - [COUNTERS] PROGRAM TOTAL : 0.4823s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2798s - [COUNTERS] Fortran MEs ( 1 ) : 0.2025s for 8192 events => throughput is 4.05E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7032s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3673s + [COUNTERS] Fortran MEs ( 1 ) : 0.3358s for 8192 events => throughput is 2.44E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112317668354764] fbridge_mode=0 + [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4570s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2547s - [COUNTERS] Fortran MEs ( 1 ) : 0.2023s for 8192 events => throughput is 4.05E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6642s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3295s + [COUNTERS] Fortran MEs ( 1 ) : 0.3347s for 8192 events => throughput is 2.45E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x10_fortran > /tmp/valassia/output_ggttg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/output_ggttg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9239236471252555E-002] fbridge_mode=0 - [UNWEIGHT] Wrote 1899 events (found 1904 events) - [COUNTERS] PROGRAM TOTAL : 3.4267s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2006s - [COUNTERS] Fortran MEs ( 1 ) : 2.2261s for 90112 events => throughput is 4.05E+04 events/s + [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 5.2826s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5869s + [COUNTERS] Fortran MEs ( 1 ) : 3.6957s for 90112 events => throughput is 2.44E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112317761225882] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112748700702684] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.8338s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5435s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2903s for 8192 events => throughput is 2.82E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.0162s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6692s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3470s for 8192 events => throughput is 2.36E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112317668354764) and cpp (0.10112317761225882) differ by less than 2E-4 (9.183959592817814e-09) +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748700702684) differ by less than 2E-4 (9.191721828116783e-09) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9239237217958461E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1899 events (found 1904 events) - [COUNTERS] PROGRAM TOTAL : 4.6852s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4916s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.1935s for 90112 events => throughput is 2.82E+04 events/s + [XSECTION] Cross section = 0.07924 [7.9238482679400354E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 5.7567s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9108s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.8459s for 90112 events => throughput is 2.34E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9239236471252555E-002) and cpp (7.9239237217958461E-002) differ by less than 2E-4 (9.4234364755863e-09) +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482679400354E-002) differ by less than 2E-4 (9.423232416594374e-09) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.908465e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.412345e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.909870e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.420469e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112317763556192] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112748702805033] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5282s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3911s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1371s for 8192 events => throughput is 5.97E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6728s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4982s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1746s for 8192 events => throughput is 4.69E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112317668354764) and cpp (0.10112317763556192) differ by less than 2E-4 (9.41440236879032e-09) +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748702805033) differ by less than 2E-4 (9.399612865834683e-09) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9239237221421968E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1899 events (found 1904 events) - [COUNTERS] PROGRAM TOTAL : 2.8489s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3381s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.5108s for 90112 events => throughput is 5.96E+04 events/s + [XSECTION] Cross section = 0.07924 [7.9238482683055667E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 3.6634s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7403s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.9231s for 90112 events => throughput is 4.69E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9239236471252555E-002) and cpp (7.9239237221421968E-002) differ by less than 2E-4 (9.467145956065792e-09) +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482683055667E-002) differ by less than 2E-4 (9.469362849401364e-09) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.149842e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.781863e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.133030e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.781998e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112317741957558] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112748681415580] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.3885s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3223s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0662s for 8192 events => throughput is 1.24E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5053s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4166s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0887s for 8192 events => throughput is 9.24E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112317668354764) and cpp (0.10112317741957558) differ by less than 2E-4 (7.278528668663853e-09) +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748681415580) differ by less than 2E-4 (7.284514991212632e-09) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,190 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9239237072275287E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1899 events (found 1904 events) - [COUNTERS] PROGRAM TOTAL : 1.9982s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2713s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7269s for 90112 events => throughput is 1.24E+05 events/s + [XSECTION] Cross section = 0.07924 [7.9238482534347232E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 2.6573s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6705s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9868s for 90112 events => throughput is 9.13E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9239236471252555E-002) and cpp (7.9239237072275287E-002) differ by less than 2E-4 (7.584913142011374e-09) +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482534347232E-002) differ by less than 2E-4 (7.592642958798024e-09) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.271467e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.325042e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.300112e+04 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.1011 [0.10112748681415580] fbridge_mode=1 + [UNWEIGHT] Wrote 386 events (found 1179 events) + [COUNTERS] PROGRAM TOTAL : 0.4897s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4093s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0805s for 8192 events => throughput is 1.02E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748681415580) differ by less than 2E-4 (7.284514991212632e-09) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07924 [7.9238482534347232E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 2.5215s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6536s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8679s for 90112 events => throughput is 1.04E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482534347232E-002) differ by less than 2E-4 (7.592642958798024e-09) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.077283e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.275740e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.075927e+05 ) sec^-1 -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.1011 [0.10112748700265108] fbridge_mode=1 + [UNWEIGHT] Wrote 386 events (found 1179 events) + [COUNTERS] PROGRAM TOTAL : 0.5656s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4472s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1184s for 8192 events => throughput is 6.92E+04 events/s -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748700265108) differ by less than 2E-4 (9.148451995955043e-09) -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07924 [7.9238482666076374E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 3.0008s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6905s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.3103s for 90112 events => throughput is 6.88E+04 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482666076374E-002) differ by less than 2E-4 (9.255082034087536e-09) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.041319e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.003038e+04 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,28 +504,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112317662375726] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112748601943165] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5606s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5530s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0076s for 8192 events => throughput is 1.08E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7759s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7704s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.51E+06 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112317668354764) and hip (0.10112317662375726) differ by less than 2E-4 (5.9126292750733e-10) +OK! xsec from fortran (0.10112748607749111) and cuda (0.10112748601943165) differ by less than 2E-4 (5.74121417074025e-10) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -391,65 +537,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9239236476482192E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1899 events (found 1904 events) - [COUNTERS] PROGRAM TOTAL : 1.5836s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4997s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0839s for 90112 events => throughput is 1.07E+06 events/s + [XSECTION] Cross section = 0.07924 [7.9238481937154381E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 2.0297s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0066s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0231s for 90112 events => throughput is 3.90E+06 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9239236471252555E-002) and hip (7.9239236476482192E-002) differ by less than 2E-4 (6.599809587726213e-11) +OK! xsec from fortran (7.9238481932717722E-002) and cuda (7.9238481937154381E-002) differ by less than 2E-4 (5.5991211667105745e-11) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.119340e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.633846e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.150503e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.090737e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.676250e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.829640e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.301160e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.232989e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.678384e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.835721e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.842635e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.245871e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.663031e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.822524e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.428966e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.726985e+06 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index e420a06bd8..324ee1a9b7 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -make USEBUILDDIR=1 BACKEND=hip + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2024-05-15_17:58:50 +DATE: 2024-05-14_21:33:38 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8704143122579712E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 [UNWEIGHT] Wrote 62 events (found 950 events) - [COUNTERS] PROGRAM TOTAL : 2.8398s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3457s - [COUNTERS] Fortran MEs ( 1 ) : 2.4942s for 8192 events => throughput is 3.28E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.7297s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3579s + [COUNTERS] Fortran MEs ( 1 ) : 4.3718s for 8192 events => throughput is 1.87E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8704143122579712E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 2.7685s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2715s - [COUNTERS] Fortran MEs ( 1 ) : 2.4969s for 8192 events => throughput is 3.28E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.7277s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3553s + [COUNTERS] Fortran MEs ( 1 ) : 4.3724s for 8192 events => throughput is 1.87E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x10_fortran > /tmp/valassia/output_ggttgg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/output_ggttgg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793532411914656E-004] fbridge_mode=0 - [UNWEIGHT] Wrote 1358 events (found 1880 events) - [COUNTERS] PROGRAM TOTAL : 29.0297s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5846s - [COUNTERS] Fortran MEs ( 1 ) : 27.4451s for 90112 events => throughput is 3.28E+03 events/s + [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=0 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 50.2700s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1537s + [COUNTERS] Fortran MEs ( 1 ) : 48.1163s for 90112 events => throughput is 1.87E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8704143122579739E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8703612510102372E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 7.8276s - [COUNTERS] Fortran Overhead ( 0 ) : 3.9995s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.8280s for 8192 events => throughput is 2.14E+03 events/s + [COUNTERS] PROGRAM TOTAL : 9.2003s + [COUNTERS] Fortran Overhead ( 0 ) : 4.7147s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.4855s for 8192 events => throughput is 1.83E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8704143122579712E-004) and cpp (3.8704143122579739E-004) differ by less than 3E-14 (6.661338147750939e-16) +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612510102372E-004) differ by less than 3E-14 (4.440892098500626e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793532411914648E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1358 events (found 1880 events) - [COUNTERS] PROGRAM TOTAL : 47.4512s - [COUNTERS] Fortran Overhead ( 0 ) : 5.3002s - [COUNTERS] CudaCpp MEs ( 2 ) : 42.1511s for 90112 events => throughput is 2.14E+03 events/s + [XSECTION] Cross section = 0.0001579 [1.5793438642451704E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 55.9449s + [COUNTERS] Fortran Overhead ( 0 ) : 6.4916s + [COUNTERS] CudaCpp MEs ( 2 ) : 49.4532s for 90112 events => throughput is 1.82E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793532411914656E-004) and cpp (1.5793532411914648E-004) differ by less than 3E-14 (5.551115123125783e-16) +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438642451704E-004) differ by less than 3E-14 (5.551115123125783e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.217290e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.878281e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.218509e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.879820e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8704143122579739E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8703612510102367E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 3.5861s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9118s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.6744s for 8192 events => throughput is 4.89E+03 events/s + [COUNTERS] PROGRAM TOTAL : 5.0313s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6432s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.3881s for 8192 events => throughput is 3.43E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8704143122579712E-004) and cpp (3.8704143122579739E-004) differ by less than 3E-14 (6.661338147750939e-16) +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612510102367E-004) differ by less than 3E-14 (2.220446049250313e-16) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793532411914648E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1358 events (found 1880 events) - [COUNTERS] PROGRAM TOTAL : 21.5443s - [COUNTERS] Fortran Overhead ( 0 ) : 3.2158s - [COUNTERS] CudaCpp MEs ( 2 ) : 18.3285s for 90112 events => throughput is 4.92E+03 events/s + [XSECTION] Cross section = 0.0001579 [1.5793438642451701E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 30.7303s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4175s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.3128s for 90112 events => throughput is 3.42E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793532411914656E-004) and cpp (1.5793532411914648E-004) differ by less than 3E-14 (5.551115123125783e-16) +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438642451701E-004) differ by less than 3E-14 (6.661338147750939e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.055979e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.595271e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.052931e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.616783e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8704143122579728E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8703612510102367E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 1.6632s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9608s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7024s for 8192 events => throughput is 1.17E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.4110s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3696s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0415s for 8192 events => throughput is 7.87E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8704143122579712E-004) and cpp (3.8704143122579728E-004) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612510102367E-004) differ by less than 3E-14 (2.220446049250313e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,190 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793532411914648E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1358 events (found 1880 events) - [COUNTERS] PROGRAM TOTAL : 9.9760s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2687s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.7073s for 90112 events => throughput is 1.17E+04 events/s + [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 14.5874s + [COUNTERS] Fortran Overhead ( 0 ) : 3.1244s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.4631s for 90112 events => throughput is 7.86E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793532411914656E-004) and cpp (1.5793532411914648E-004) differ by less than 3E-14 (5.551115123125783e-16) +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438642451712E-004) differ by less than 3E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.207701e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.080696e+03 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.003260e+03 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000387 [3.8703612510102367E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 121 events (found 923 events) + [COUNTERS] PROGRAM TOTAL : 2.1884s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2624s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9260s for 8192 events => throughput is 8.85E+03 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612510102367E-004) differ by less than 3E-14 (2.220446049250313e-16) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 13.3165s + [COUNTERS] Fortran Overhead ( 0 ) : 3.0431s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.2735s for 90112 events => throughput is 8.77E+03 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438642451712E-004) differ by less than 3E-14 (0.0) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.044942e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.203855e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.036114e+03 ) sec^-1 -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000387 [3.8703612510102367E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 121 events (found 923 events) + [COUNTERS] PROGRAM TOTAL : 2.7894s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5694s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2200s for 8192 events => throughput is 6.71E+03 events/s -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612510102367E-004) differ by less than 3E-14 (2.220446049250313e-16) -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 16.8074s + [COUNTERS] Fortran Overhead ( 0 ) : 3.3631s + [COUNTERS] CudaCpp MEs ( 2 ) : 13.4443s for 90112 events => throughput is 6.70E+03 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438642451712E-004) differ by less than 3E-14 (0.0) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.788020e+03 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.794108e+03 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,28 +504,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8704143122579723E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8703612510102367E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 0.9652s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8508s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1144s for 8192 events => throughput is 7.16E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.9054s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8724s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0330s for 8192 events => throughput is 2.48E+05 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8704143122579712E-004) and hip (3.8704143122579723E-004) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (3.8703612510102356E-004) and cuda (3.8703612510102367E-004) differ by less than 3E-14 (2.220446049250313e-16) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -391,65 +537,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793532411914653E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1358 events (found 1880 events) - [COUNTERS] PROGRAM TOTAL : 3.3262s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0704s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2558s for 90112 events => throughput is 7.18E+04 events/s + [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 3.0251s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6614s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3637s for 90112 events => throughput is 2.48E+05 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793532411914656E-004) and hip (1.5793532411914653E-004) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (1.5793438642451712E-004) and cuda (1.5793438642451712E-004) differ by less than 3E-14 (0.0) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.211429e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.279557e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.433723e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.504315e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.244345e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.123492e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.035029e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.160692e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.238048e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.113945e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.230999e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.155779e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.246450e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.135740e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.390998e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.451591e+05 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 31f43ccc9a..be512c6871 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -make USEBUILDDIR=1 BACKEND=hip + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2024-05-15_18:02:31 +DATE: 2024-05-14_21:38:06 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8704143122579712E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 [UNWEIGHT] Wrote 62 events (found 950 events) - [COUNTERS] PROGRAM TOTAL : 2.7732s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2741s - [COUNTERS] Fortran MEs ( 1 ) : 2.4991s for 8192 events => throughput is 3.28E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.8156s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3706s + [COUNTERS] Fortran MEs ( 1 ) : 4.4449s for 8192 events => throughput is 1.84E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8704143122579712E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 2.7716s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2746s - [COUNTERS] Fortran MEs ( 1 ) : 2.4970s for 8192 events => throughput is 3.28E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.8102s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3614s + [COUNTERS] Fortran MEs ( 1 ) : 4.4489s for 8192 events => throughput is 1.84E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x10_fortran > /tmp/valassia/output_ggttgg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/output_ggttgg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793532411914656E-004] fbridge_mode=0 - [UNWEIGHT] Wrote 1358 events (found 1880 events) - [COUNTERS] PROGRAM TOTAL : 29.0468s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5824s - [COUNTERS] Fortran MEs ( 1 ) : 27.4644s for 90112 events => throughput is 3.28E+03 events/s + [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=0 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 51.1256s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1878s + [COUNTERS] Fortran MEs ( 1 ) : 48.9378s for 90112 events => throughput is 1.84E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8704259755238570E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8703729438336302E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 7.0056s - [COUNTERS] Fortran Overhead ( 0 ) : 3.5999s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.4056s for 8192 events => throughput is 2.41E+03 events/s + [COUNTERS] PROGRAM TOTAL : 9.0331s + [COUNTERS] Fortran Overhead ( 0 ) : 4.6319s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.4012s for 8192 events => throughput is 1.86E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8704143122579712E-004) and cpp (3.8704259755238570E-004) differ by less than 4E-4 (3.0134411834747965e-06) +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703729438336302E-004) differ by less than 4E-4 (3.021119383106452e-06) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793580182117605E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1358 events (found 1880 events) - [COUNTERS] PROGRAM TOTAL : 42.3763s - [COUNTERS] Fortran Overhead ( 0 ) : 4.9273s - [COUNTERS] CudaCpp MEs ( 2 ) : 37.4490s for 90112 events => throughput is 2.41E+03 events/s + [XSECTION] Cross section = 0.0001579 [1.5793486626492658E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 55.2131s + [COUNTERS] Fortran Overhead ( 0 ) : 6.4300s + [COUNTERS] CudaCpp MEs ( 2 ) : 48.7831s for 90112 events => throughput is 1.85E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793532411914656E-004) and cpp (1.5793580182117605E-004) differ by less than 4E-4 (3.024668687290344e-06) +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793486626492658E-004) differ by less than 4E-4 (3.0382263187522796e-06) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.466583e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.921273e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.476652e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.922301e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8704254541054809E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8703722581317850E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 1.9705s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1203s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8502s for 8192 events => throughput is 9.64E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7702s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5550s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2152s for 8192 events => throughput is 6.74E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8704143122579712E-004) and cpp (3.8704254541054809E-004) differ by less than 4E-4 (2.8787221757475834e-06) +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703722581317850E-004) differ by less than 4E-4 (2.843951981690296e-06) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793578161882866E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1358 events (found 1880 events) - [COUNTERS] PROGRAM TOTAL : 11.7274s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4156s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.3119s for 90112 events => throughput is 9.68E+03 events/s + [XSECTION] Cross section = 0.0001579 [1.5793483759856148E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 16.7184s + [COUNTERS] Fortran Overhead ( 0 ) : 3.3394s + [COUNTERS] CudaCpp MEs ( 2 ) : 13.3791s for 90112 events => throughput is 6.74E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793532411914656E-004) and cpp (1.5793578161882866E-004) differ by less than 4E-4 (2.896753368286653e-06) +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793483759856148E-004) differ by less than 4E-4 (2.856718252175483e-06) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.935669e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.894177e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.946862e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.934836e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8704254166302247E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8703722425602170E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 0.9852s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6291s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3561s for 8192 events => throughput is 2.30E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.4158s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8799s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5359s for 8192 events => throughput is 1.53E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8704143122579712E-004) and cpp (3.8704254166302247E-004) differ by less than 4E-4 (2.8690396836061893e-06) +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703722425602170E-004) differ by less than 4E-4 (2.8399286962077497e-06) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,190 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793578009696313E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1358 events (found 1880 events) - [COUNTERS] PROGRAM TOTAL : 5.8306s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9292s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.9014s for 90112 events => throughput is 2.31E+04 events/s + [XSECTION] Cross section = 0.0001579 [1.5793483698376133E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 8.5879s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6705s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.9174s for 90112 events => throughput is 1.52E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793532411914656E-004) and cpp (1.5793578009696313E-004) differ by less than 4E-4 (2.887117363403746e-06) +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793483698376133E-004) differ by less than 4E-4 (2.852825495613942e-06) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.387091e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.575380e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.571062e+04 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000387 [3.8703722425602170E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 121 events (found 923 events) + [COUNTERS] PROGRAM TOTAL : 1.2970s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8246s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4724s for 8192 events => throughput is 1.73E+04 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703722425602170E-004) differ by less than 4E-4 (2.8399286962077497e-06) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0001579 [1.5793483698376133E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 7.8247s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6015s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.2232s for 90112 events => throughput is 1.73E+04 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793483698376133E-004) differ by less than 4E-4 (2.852825495613942e-06) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.707243e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.366035e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.779309e+04 ) sec^-1 -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000387 [3.8703728658657426E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 121 events (found 923 events) + [COUNTERS] PROGRAM TOTAL : 1.5574s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9553s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6021s for 8192 events => throughput is 1.36E+04 events/s -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703728658657426E-004) differ by less than 4E-4 (3.0009745224379714e-06) -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0001579 [1.5793486977281547E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 9.3785s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7424s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.6361s for 90112 events => throughput is 1.36E+04 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793486977281547E-004) differ by less than 4E-4 (3.0604373708609245e-06) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.370479e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.379532e+04 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,28 +504,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8704261630635685E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8703736267486325E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 0.8375s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7812s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0563s for 8192 events => throughput is 1.46E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8882s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8668s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0214s for 8192 events => throughput is 3.84E+05 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8704143122579712E-004) and hip (3.8704261630635685E-004) differ by less than 4E-4 (3.0618958697381515e-06) +OK! xsec from fortran (3.8703612510102356E-004) and cuda (3.8703736267486325E-004) differ by less than 4E-4 (3.1975667371675343e-06) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -391,65 +537,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793580869662166E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1358 events (found 1880 events) - [COUNTERS] PROGRAM TOTAL : 2.6159s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9951s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6208s for 90112 events => throughput is 1.45E+05 events/s + [XSECTION] Cross section = 0.0001579 [1.5793489323670813E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 2.8658s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6303s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2355s for 90112 events => throughput is 3.83E+05 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793532411914656E-004) and hip (1.5793580869662166E-004) differ by less than 4E-4 (3.0682019858119247e-06) +OK! xsec from fortran (1.5793438642451712E-004) and cuda (1.5793489323670813E-004) differ by less than 4E-4 (3.20900471706409e-06) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.470415e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.577461e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.699093e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.937327e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.711466e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.565490e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.298693e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.722014e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.704023e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.574587e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.029112e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.694383e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.702723e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.565279e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.377197e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.537576e+05 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 4ef822ce19..29ba71ed95 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2024-05-15_18:05:30 +DATE: 2024-05-14_21:41:40 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8704143122579712E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 [UNWEIGHT] Wrote 62 events (found 950 events) - [COUNTERS] PROGRAM TOTAL : 2.7628s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2725s - [COUNTERS] Fortran MEs ( 1 ) : 2.4903s for 8192 events => throughput is 3.29E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.8133s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3646s + [COUNTERS] Fortran MEs ( 1 ) : 4.4488s for 8192 events => throughput is 1.84E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8704143122579712E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 2.7729s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2757s - [COUNTERS] Fortran MEs ( 1 ) : 2.4972s for 8192 events => throughput is 3.28E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.8064s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3598s + [COUNTERS] Fortran MEs ( 1 ) : 4.4466s for 8192 events => throughput is 1.84E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x10_fortran > /tmp/valassia/output_ggttgg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/output_ggttgg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793532411914656E-004] fbridge_mode=0 - [UNWEIGHT] Wrote 1358 events (found 1880 events) - [COUNTERS] PROGRAM TOTAL : 29.0273s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5864s - [COUNTERS] Fortran MEs ( 1 ) : 27.4409s for 90112 events => throughput is 3.28E+03 events/s + [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=0 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 51.1214s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1855s + [COUNTERS] Fortran MEs ( 1 ) : 48.9359s for 90112 events => throughput is 1.84E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8704143272044121E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8703612659176674E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 7.9143s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0687s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.8455s for 8192 events => throughput is 2.13E+03 events/s + [COUNTERS] PROGRAM TOTAL : 9.4720s + [COUNTERS] Fortran Overhead ( 0 ) : 4.8531s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.6189s for 8192 events => throughput is 1.77E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8704143122579712E-004) and cpp (3.8704143272044121E-004) differ by less than 2E-4 (3.861716058040088e-09) +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612659176674E-004) differ by less than 2E-4 (3.851690077993908e-09) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793532474032691E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1358 events (found 1880 events) - [COUNTERS] PROGRAM TOTAL : 47.6831s - [COUNTERS] Fortran Overhead ( 0 ) : 5.3827s - [COUNTERS] CudaCpp MEs ( 2 ) : 42.3004s for 90112 events => throughput is 2.13E+03 events/s + [XSECTION] Cross section = 0.0001579 [1.5793438704534934E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 57.5498s + [COUNTERS] Fortran Overhead ( 0 ) : 6.6421s + [COUNTERS] CudaCpp MEs ( 2 ) : 50.9077s for 90112 events => throughput is 1.77E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793532411914656E-004) and cpp (1.5793532474032691E-004) differ by less than 2E-4 (3.933131154099101e-09) +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438704534934E-004) differ by less than 2E-4 (3.930950231989527e-09) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.166106e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.826732e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.162276e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.828543e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8704143304774347E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8703612692816703E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 3.5538s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8968s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.6570s for 8192 events => throughput is 4.94E+03 events/s + [COUNTERS] PROGRAM TOTAL : 5.1811s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7343s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.4469s for 8192 events => throughput is 3.35E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8704143122579712E-004) and cpp (3.8704143304774347E-004) differ by less than 2E-4 (4.707367828871156e-09) +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612692816703E-004) differ by less than 2E-4 (4.720860369289426e-09) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793532476698221E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1358 events (found 1880 events) - [COUNTERS] PROGRAM TOTAL : 21.4115s - [COUNTERS] Fortran Overhead ( 0 ) : 3.1899s - [COUNTERS] CudaCpp MEs ( 2 ) : 18.2217s for 90112 events => throughput is 4.95E+03 events/s + [XSECTION] Cross section = 0.0001579 [1.5793438707226035E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 31.4513s + [COUNTERS] Fortran Overhead ( 0 ) : 4.5258s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.9255s for 90112 events => throughput is 3.35E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793532411914656E-004) and cpp (1.5793532476698221E-004) differ by less than 2E-4 (4.101904815811963e-09) +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438707226035E-004) differ by less than 2E-4 (4.1013439311399225e-09) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.129046e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.443194e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.113622e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.460068e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8704143287857844E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8703612675240517E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 1.6365s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9462s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6903s for 8192 events => throughput is 1.19E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.4477s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3897s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0581s for 8192 events => throughput is 7.74E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8704143122579712E-004) and cpp (3.8704143287857844E-004) differ by less than 2E-4 (4.2702956726259345e-09) +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612675240517E-004) differ by less than 2E-4 (4.266737629876616e-09) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,190 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793532473043530E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1358 events (found 1880 events) - [COUNTERS] PROGRAM TOTAL : 9.8255s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2612s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.5644s for 90112 events => throughput is 1.19E+04 events/s + [XSECTION] Cross section = 0.0001579 [1.5793438703631775E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 14.7436s + [COUNTERS] Fortran Overhead ( 0 ) : 3.1712s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.5725s for 90112 events => throughput is 7.79E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793532411914656E-004) and cpp (1.5793532473043530E-004) differ by less than 2E-4 (3.870500364655527e-09) +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438703631775E-004) differ by less than 2E-4 (3.873764420347925e-09) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.230886e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.981637e+03 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.008855e+03 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000387 [3.8703612675240517E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 121 events (found 923 events) + [COUNTERS] PROGRAM TOTAL : 2.1895s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2554s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9341s for 8192 events => throughput is 8.77E+03 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612675240517E-004) differ by less than 2E-4 (4.266737629876616e-09) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0001579 [1.5793438703631775E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 13.2415s + [COUNTERS] Fortran Overhead ( 0 ) : 3.0496s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.1919s for 90112 events => throughput is 8.84E+03 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438703631775E-004) differ by less than 2E-4 (3.873764420347925e-09) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.162017e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.229330e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.164661e+03 ) sec^-1 -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000387 [3.8703612675240517E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 121 events (found 923 events) + [COUNTERS] PROGRAM TOTAL : 2.8140s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5816s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2324s for 8192 events => throughput is 6.65E+03 events/s -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612675240517E-004) differ by less than 2E-4 (4.266737629876616e-09) -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0001579 [1.5793438703631775E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 16.9564s + [COUNTERS] Fortran Overhead ( 0 ) : 3.3714s + [COUNTERS] CudaCpp MEs ( 2 ) : 13.5850s for 90112 events => throughput is 6.63E+03 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438703631775E-004) differ by less than 2E-4 (3.873764420347925e-09) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.737200e+03 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.703774e+03 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,28 +504,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8704143124638075E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8703612512203166E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 0.8776s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7626s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1149s for 8192 events => throughput is 7.13E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.9054s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8725s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0330s for 8192 events => throughput is 2.49E+05 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8704143122579712E-004) and hip (3.8704143124638075E-004) differ by less than 2E-4 (5.318190332559425e-11) +OK! xsec from fortran (3.8703612510102356E-004) and cuda (3.8703612512203166E-004) differ by less than 2E-4 (5.427946980773868e-11) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -391,65 +537,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793532411887058E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1358 events (found 1880 events) - [COUNTERS] PROGRAM TOTAL : 3.3524s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0921s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2604s for 90112 events => throughput is 7.15E+04 events/s + [XSECTION] Cross section = 0.0001579 [1.5793438642387717E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 3.0306s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6671s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3634s for 90112 events => throughput is 2.48E+05 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793532411914656E-004) and hip (1.5793532411887058E-004) differ by less than 2E-4 (1.7474910407599964e-12) +OK! xsec from fortran (1.5793438642451712E-004) and cuda (1.5793438642387717E-004) differ by less than 2E-4 (4.051980972974434e-12) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.190579e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.282647e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.438917e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.507791e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.245869e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.123606e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.024461e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.148703e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.240706e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.119873e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.227786e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.178988e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.245800e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.119575e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.380658e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.452858e+05 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index a037fad221..834e2f3a3b 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -make USEBUILDDIR=1 BACKEND=hip + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2024-05-15_17:49:11 +DATE: 2024-05-14_21:47:42 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403628942014972E-006] fbridge_mode=0 + [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 54.5801s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4570s - [COUNTERS] Fortran MEs ( 1 ) : 54.1232s for 8192 events => throughput is 1.51E+02 events/s + [COUNTERS] PROGRAM TOTAL : 103.3030s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5119s + [COUNTERS] Fortran MEs ( 1 ) : 102.7911s for 8192 events => throughput is 7.97E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403628942014972E-006] fbridge_mode=0 + [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 54.4417s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3799s - [COUNTERS] Fortran MEs ( 1 ) : 54.0618s for 8192 events => throughput is 1.52E+02 events/s + [COUNTERS] PROGRAM TOTAL : 103.2514s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5150s + [COUNTERS] Fortran MEs ( 1 ) : 102.7364s for 8192 events => throughput is 7.97E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x10_fortran > /tmp/valassia/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322783648085419E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 596.9350s - [COUNTERS] Fortran Overhead ( 0 ) : 3.0436s - [COUNTERS] Fortran MEs ( 1 ) : 593.8914s for 90112 events => throughput is 1.52E+02 events/s + [COUNTERS] PROGRAM TOTAL : 1123.8389s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4720s + [COUNTERS] Fortran MEs ( 1 ) : 1119.3668s for 90112 events => throughput is 8.05E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403628942015001E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.24e-06 [1.2403985227939193E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 175.8690s - [COUNTERS] Fortran Overhead ( 0 ) : 80.1913s - [COUNTERS] CudaCpp MEs ( 2 ) : 95.6777s for 8192 events => throughput is 8.56E+01 events/s + [COUNTERS] PROGRAM TOTAL : 239.0576s + [COUNTERS] Fortran Overhead ( 0 ) : 109.8826s + [COUNTERS] CudaCpp MEs ( 2 ) : 129.1750s for 8192 events => throughput is 6.34E+01 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403628942014972E-006) and cpp (1.2403628942015001E-006) differ by less than 3E-14 (2.4424906541753444e-15) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985227939193E-006) differ by less than 3E-14 (1.5543122344752192e-15) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322783648085453E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1135.3523s - [COUNTERS] Fortran Overhead ( 0 ) : 83.0183s - [COUNTERS] CudaCpp MEs ( 2 ) : 1052.3340s for 90112 events => throughput is 8.56E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1529.6606s + [COUNTERS] Fortran Overhead ( 0 ) : 113.3284s + [COUNTERS] CudaCpp MEs ( 2 ) : 1416.3323s for 90112 events => throughput is 6.36E+01 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322783648085419E-007) and cpp (2.3322783648085453E-007) differ by less than 3E-14 (1.5543122344752192e-15) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656014E-007) differ by less than 3E-14 (1.9984014443252818e-15) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.027176e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.491799e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.001904e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.526661e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403628942015003E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.24e-06 [1.2403985227939197E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 90.5736s - [COUNTERS] Fortran Overhead ( 0 ) : 41.2684s - [COUNTERS] CudaCpp MEs ( 2 ) : 49.3052s for 8192 events => throughput is 1.66E+02 events/s + [COUNTERS] PROGRAM TOTAL : 115.9147s + [COUNTERS] Fortran Overhead ( 0 ) : 53.4186s + [COUNTERS] CudaCpp MEs ( 2 ) : 62.4961s for 8192 events => throughput is 1.31E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403628942014972E-006) and cpp (1.2403628942015003E-006) differ by less than 3E-14 (2.6645352591003757e-15) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985227939197E-006) differ by less than 3E-14 (1.7763568394002505e-15) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322783648085448E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.332e-07 [2.3322993086656017E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 538.9163s - [COUNTERS] Fortran Overhead ( 0 ) : 40.6550s - [COUNTERS] CudaCpp MEs ( 2 ) : 498.2612s for 90112 events => throughput is 1.81E+02 events/s + [COUNTERS] PROGRAM TOTAL : 746.6810s + [COUNTERS] Fortran Overhead ( 0 ) : 57.3647s + [COUNTERS] CudaCpp MEs ( 2 ) : 689.3163s for 90112 events => throughput is 1.31E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322783648085419E-007) and cpp (2.3322783648085448E-007) differ by less than 3E-14 (1.3322676295501878e-15) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656017E-007) differ by less than 3E-14 (2.220446049250313e-15) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.251988e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.551185e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.245083e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.557439e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403628942015001E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 35.5321s - [COUNTERS] Fortran Overhead ( 0 ) : 16.1409s - [COUNTERS] CudaCpp MEs ( 2 ) : 19.3912s for 8192 events => throughput is 4.22E+02 events/s + [COUNTERS] PROGRAM TOTAL : 53.2704s + [COUNTERS] Fortran Overhead ( 0 ) : 24.6321s + [COUNTERS] CudaCpp MEs ( 2 ) : 28.6383s for 8192 events => throughput is 2.86E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403628942014972E-006) and cpp (1.2403628942015001E-006) differ by less than 3E-14 (2.4424906541753444e-15) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985227939191E-006) differ by less than 3E-14 (1.3322676295501878e-15) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,190 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322783648085445E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 233.9561s - [COUNTERS] Fortran Overhead ( 0 ) : 18.6633s - [COUNTERS] CudaCpp MEs ( 2 ) : 215.2929s for 90112 events => throughput is 4.19E+02 events/s + [COUNTERS] PROGRAM TOTAL : 343.5580s + [COUNTERS] Fortran Overhead ( 0 ) : 28.7102s + [COUNTERS] CudaCpp MEs ( 2 ) : 314.8478s for 90112 events => throughput is 2.86E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322783648085419E-007) and cpp (2.3322783648085445E-007) differ by less than 3E-14 (1.1102230246251565e-15) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656014E-007) differ by less than 3E-14 (1.9984014443252818e-15) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.255758e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.413864e+02 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.410239e+02 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 70 events (found 407 events) + [COUNTERS] PROGRAM TOTAL : 48.2891s + [COUNTERS] Fortran Overhead ( 0 ) : 21.8234s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.4657s for 8192 events => throughput is 3.10E+02 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985227939191E-006) differ by less than 3E-14 (1.3322676295501878e-15) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 303 events (found 1531 events) + [COUNTERS] PROGRAM TOTAL : 313.6108s + [COUNTERS] Fortran Overhead ( 0 ) : 25.6541s + [COUNTERS] CudaCpp MEs ( 2 ) : 287.9567s for 90112 events => throughput is 3.13E+02 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656014E-007) differ by less than 3E-14 (1.9984014443252818e-15) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.799191e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.252702e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.784281e+02 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 70 events (found 407 events) + [COUNTERS] PROGRAM TOTAL : 51.5482s + [COUNTERS] Fortran Overhead ( 0 ) : 25.2783s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.2699s for 8192 events => throughput is 3.12E+02 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985227939191E-006) differ by less than 3E-14 (1.3322676295501878e-15) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 303 events (found 1531 events) + [COUNTERS] PROGRAM TOTAL : 319.4275s + [COUNTERS] Fortran Overhead ( 0 ) : 29.3179s + [COUNTERS] CudaCpp MEs ( 2 ) : 290.1096s for 90112 events => throughput is 3.11E+02 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656014E-007) differ by less than 3E-14 (1.9984014443252818e-15) -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.304770e+02 ) sec^-1 -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.314422e+02 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,28 +504,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403628942015003E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.24e-06 [1.2403985227939195E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 11.5992s - [COUNTERS] Fortran Overhead ( 0 ) : 7.7374s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.8618s for 8192 events => throughput is 2.12E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.2754s + [COUNTERS] Fortran Overhead ( 0 ) : 3.1942s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0812s for 8192 events => throughput is 7.58E+03 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403628942014972E-006) and hip (1.2403628942015003E-006) differ by less than 3E-14 (2.6645352591003757e-15) +OK! xsec from fortran (1.2403985227939174E-006) and cuda (1.2403985227939195E-006) differ by less than 3E-14 (1.7763568394002505e-15) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -391,65 +537,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322783648085437E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.332e-07 [2.3322993086656006E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 52.7480s - [COUNTERS] Fortran Overhead ( 0 ) : 10.1979s - [COUNTERS] CudaCpp MEs ( 2 ) : 42.5501s for 90112 events => throughput is 2.12E+03 events/s + [COUNTERS] PROGRAM TOTAL : 19.1318s + [COUNTERS] Fortran Overhead ( 0 ) : 7.2127s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.9192s for 90112 events => throughput is 7.56E+03 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322783648085419E-007) and hip (2.3322783648085437E-007) differ by less than 3E-14 (8.881784197001252e-16) +OK! xsec from fortran (2.3322993086655967E-007) and cuda (2.3322993086656006E-007) differ by less than 3E-14 (1.7763568394002505e-15) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.140157e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.540502e+03 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.190115e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.290286e+03 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.507331e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.317349e+03 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.458846e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.595658e+03 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.517358e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.250349e+03 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.499481e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.495786e+03 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.517207e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.265281e+03 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.115383e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.243097e+03 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index 5df6ece655..14284eae7f 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -make USEBUILDDIR=1 BACKEND=hip + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' - make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2024-05-15_18:44:54 +DATE: 2024-05-14_23:21:26 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403628942014972E-006] fbridge_mode=0 + [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 55.3069s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3661s - [COUNTERS] Fortran MEs ( 1 ) : 54.9408s for 8192 events => throughput is 1.49E+02 events/s + [COUNTERS] PROGRAM TOTAL : 103.3301s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5120s + [COUNTERS] Fortran MEs ( 1 ) : 102.8181s for 8192 events => throughput is 7.97E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403628942014972E-006] fbridge_mode=0 + [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 54.5783s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3949s - [COUNTERS] Fortran MEs ( 1 ) : 54.1834s for 8192 events => throughput is 1.51E+02 events/s + [COUNTERS] PROGRAM TOTAL : 103.1588s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5135s + [COUNTERS] Fortran MEs ( 1 ) : 102.6453s for 8192 events => throughput is 7.98E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x10_fortran > /tmp/valassia/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322783648085419E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 630.0786s - [COUNTERS] Fortran Overhead ( 0 ) : 3.1885s - [COUNTERS] Fortran MEs ( 1 ) : 626.8901s for 90112 events => throughput is 1.44E+02 events/s + [COUNTERS] PROGRAM TOTAL : 1125.1919s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4525s + [COUNTERS] Fortran MEs ( 1 ) : 1120.7394s for 90112 events => throughput is 8.04E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,23 +124,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.241e-06 [1.2405363572559468E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.241e-06 [1.2405719957040752E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 162.5285s - [COUNTERS] Fortran Overhead ( 0 ) : 74.4601s - [COUNTERS] CudaCpp MEs ( 2 ) : 88.0684s for 8192 events => throughput is 9.30E+01 events/s + [COUNTERS] PROGRAM TOTAL : 212.5627s + [COUNTERS] Fortran Overhead ( 0 ) : 97.5579s + [COUNTERS] CudaCpp MEs ( 2 ) : 115.0048s for 8192 events => throughput is 7.12E+01 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403628942014972E-006) and cpp (1.2405363572559468E-006) differ by less than 4E-4 (0.00013984863241267576) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2405719957040752E-006) differ by less than 4E-4 (0.00013985256106807675) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -158,37 +158,37 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.333e-07 [2.3326080615569212E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.333e-07 [2.3326290771198648E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1044.5931s - [COUNTERS] Fortran Overhead ( 0 ) : 77.5798s - [COUNTERS] CudaCpp MEs ( 2 ) : 967.0134s for 90112 events => throughput is 9.32E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1367.0989s + [COUNTERS] Fortran Overhead ( 0 ) : 101.8036s + [COUNTERS] CudaCpp MEs ( 2 ) : 1265.2953s for 90112 events => throughput is 7.12E+01 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322783648085419E-007) and cpp (2.3326080615569212E-007) differ by less than 4E-4 (0.00014136252059526733) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326290771198648E-007) differ by less than 4E-4 (0.00014139199589124907) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.108837e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.563596e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.107427e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.552450e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -202,23 +202,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.241e-06 [1.2405361288903015E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.241e-06 [1.2405717007921116E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 39.5468s - [COUNTERS] Fortran Overhead ( 0 ) : 18.1923s - [COUNTERS] CudaCpp MEs ( 2 ) : 21.3545s for 8192 events => throughput is 3.84E+02 events/s + [COUNTERS] PROGRAM TOTAL : 52.5114s + [COUNTERS] Fortran Overhead ( 0 ) : 24.7824s + [COUNTERS] CudaCpp MEs ( 2 ) : 27.7291s for 8192 events => throughput is 2.95E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403628942014972E-006) and cpp (1.2405361288903015E-006) differ by less than 4E-4 (0.0001396645204514435) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2405717007921116E-006) differ by less than 4E-4 (0.00013961480525170877) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -236,37 +236,37 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.333e-07 [2.3326076878598447E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.333e-07 [2.3326284900828787E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 254.9926s - [COUNTERS] Fortran Overhead ( 0 ) : 20.7416s - [COUNTERS] CudaCpp MEs ( 2 ) : 234.2510s for 90112 events => throughput is 3.85E+02 events/s + [COUNTERS] PROGRAM TOTAL : 336.6135s + [COUNTERS] Fortran Overhead ( 0 ) : 28.7158s + [COUNTERS] CudaCpp MEs ( 2 ) : 307.8977s for 90112 events => throughput is 2.93E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322783648085419E-007) and cpp (2.3326076878598447E-007) differ by less than 4E-4 (0.00014120229226155523) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326284900828787E-007) differ by less than 4E-4 (0.00014114029707035236) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.743171e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.337586e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.623247e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.327352e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -280,23 +280,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.241e-06 [1.2405360895331841E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.241e-06 [1.2405716659252656E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 18.1778s - [COUNTERS] Fortran Overhead ( 0 ) : 8.4615s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.7163s for 8192 events => throughput is 8.43E+02 events/s + [COUNTERS] PROGRAM TOTAL : 27.4221s + [COUNTERS] Fortran Overhead ( 0 ) : 12.7945s + [COUNTERS] CudaCpp MEs ( 2 ) : 14.6276s for 8192 events => throughput is 5.60E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403628942014972E-006) and cpp (1.2405360895331841E-006) differ by less than 4E-4 (0.00013963279012663143) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2405716659252656E-006) differ by less than 4E-4 (0.00013958669586155992) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -314,45 +314,195 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.333e-07 [2.3326069099562333E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.333e-07 [2.3326277036840957E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 117.9708s - [COUNTERS] Fortran Overhead ( 0 ) : 10.8812s - [COUNTERS] CudaCpp MEs ( 2 ) : 107.0896s for 90112 events => throughput is 8.41E+02 events/s + [COUNTERS] PROGRAM TOTAL : 178.0327s + [COUNTERS] Fortran Overhead ( 0 ) : 16.8149s + [COUNTERS] CudaCpp MEs ( 2 ) : 161.2178s for 90112 events => throughput is 5.59E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322783648085419E-007) and cpp (2.3326069099562333E-007) differ by less than 4E-4 (0.00014086875419705436) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326277036840957E-007) differ by less than 4E-4 (0.00014080311959907554) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.035611e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.699081e+02 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.673580e+02 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.241e-06 [1.2405716659252656E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 70 events (found 407 events) + [COUNTERS] PROGRAM TOTAL : 24.2879s + [COUNTERS] Fortran Overhead ( 0 ) : 11.2621s + [COUNTERS] CudaCpp MEs ( 2 ) : 13.0258s for 8192 events => throughput is 6.29E+02 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2405716659252656E-006) differ by less than 4E-4 (0.00013958669586155992) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.333e-07 [2.3326277036840957E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 303 events (found 1531 events) + [COUNTERS] PROGRAM TOTAL : 159.3803s + [COUNTERS] Fortran Overhead ( 0 ) : 15.2686s + [COUNTERS] CudaCpp MEs ( 2 ) : 144.1117s for 90112 events => throughput is 6.25E+02 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326277036840957E-007) differ by less than 4E-4 (0.00014080311959907554) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.640478e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.043163e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.647363e+02 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.241e-06 [1.2405719306052570E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 70 events (found 407 events) + [COUNTERS] PROGRAM TOTAL : 26.0176s + [COUNTERS] Fortran Overhead ( 0 ) : 12.8723s + [COUNTERS] CudaCpp MEs ( 2 ) : 13.1453s for 8192 events => throughput is 6.23E+02 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2405719306052570E-006) differ by less than 4E-4 (0.00013980007888836354) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.333e-07 [2.3326283660088769E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 303 events (found 1531 events) + [COUNTERS] PROGRAM TOTAL : 162.3061s + [COUNTERS] Fortran Overhead ( 0 ) : 16.9325s + [COUNTERS] CudaCpp MEs ( 2 ) : 145.3735s for 90112 events => throughput is 6.20E+02 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326283660088769E-007) differ by less than 4E-4 (0.00014108709892313165) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.627762e+02 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.645257e+02 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -364,28 +514,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.241e-06 [1.2405363557292459E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.241e-06 [1.2405722175509512E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 6.2175s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3549s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8626s for 8192 events => throughput is 4.40E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.5358s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0394s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4964s for 8192 events => throughput is 1.65E+04 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403628942014972E-006) and hip (1.2405363557292459E-006) differ by less than 4E-4 (0.00013984740156258724) +OK! xsec from fortran (1.2403985227939174E-006) and cuda (1.2405722175509512E-006) differ by less than 4E-4 (0.00014003141235829908) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -397,65 +547,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.333e-07 [2.3326074784076956E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.333e-07 [2.3326296967941821E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 27.3553s - [COUNTERS] Fortran Overhead ( 0 ) : 7.0476s - [COUNTERS] CudaCpp MEs ( 2 ) : 20.3077s for 90112 events => throughput is 4.44E+03 events/s + [COUNTERS] PROGRAM TOTAL : 11.4669s + [COUNTERS] Fortran Overhead ( 0 ) : 6.0479s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.4190s for 90112 events => throughput is 1.66E+04 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322783648085419E-007) and hip (2.3326074784076956E-007) differ by less than 4E-4 (0.00014111248645076735) +OK! xsec from fortran (2.3322993086655967E-007) and cuda (2.3326296967941821E-007) differ by less than 4E-4 (0.0001416576883412901) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.444985e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.635824e+04 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.492484e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.645926e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.263002e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.338441e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.464674e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.336873e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.314870e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.301357e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.016933e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.323702e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.259649e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.316894e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.069920e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.434196e+03 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 10d4f0ce63..4c053bc03a 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' - make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2024-05-15_19:29:44 +DATE: 2024-05-15_00:31:56 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403628942014972E-006] fbridge_mode=0 + [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 54.4825s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3679s - [COUNTERS] Fortran MEs ( 1 ) : 54.1146s for 8192 events => throughput is 1.51E+02 events/s + [COUNTERS] PROGRAM TOTAL : 103.3564s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5129s + [COUNTERS] Fortran MEs ( 1 ) : 102.8435s for 8192 events => throughput is 7.97E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403628942014972E-006] fbridge_mode=0 + [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 54.3980s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3673s - [COUNTERS] Fortran MEs ( 1 ) : 54.0306s for 8192 events => throughput is 1.52E+02 events/s + [COUNTERS] PROGRAM TOTAL : 103.0354s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5118s + [COUNTERS] Fortran MEs ( 1 ) : 102.5236s for 8192 events => throughput is 7.99E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x10_fortran > /tmp/valassia/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322783648085419E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 597.9891s - [COUNTERS] Fortran Overhead ( 0 ) : 3.0543s - [COUNTERS] Fortran MEs ( 1 ) : 594.9349s for 90112 events => throughput is 1.51E+02 events/s + [COUNTERS] PROGRAM TOTAL : 1126.2368s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4377s + [COUNTERS] Fortran MEs ( 1 ) : 1121.7991s for 90112 events => throughput is 8.03E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403629013416990E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.24e-06 [1.2403985299359844E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 174.8815s - [COUNTERS] Fortran Overhead ( 0 ) : 80.3539s - [COUNTERS] CudaCpp MEs ( 2 ) : 94.5275s for 8192 events => throughput is 8.67E+01 events/s + [COUNTERS] PROGRAM TOTAL : 225.4910s + [COUNTERS] Fortran Overhead ( 0 ) : 103.7263s + [COUNTERS] CudaCpp MEs ( 2 ) : 121.7646s for 8192 events => throughput is 6.73E+01 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403628942014972E-006) and cpp (1.2403629013416990E-006) differ by less than 2E-4 (5.7565425759520394e-09) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985299359844E-006) differ by less than 2E-4 (5.7578810608305275e-09) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322783773791503E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.332e-07 [2.3322993212353001E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1131.1467s - [COUNTERS] Fortran Overhead ( 0 ) : 83.5012s - [COUNTERS] CudaCpp MEs ( 2 ) : 1047.6455s for 90112 events => throughput is 8.60E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1531.3712s + [COUNTERS] Fortran Overhead ( 0 ) : 116.0375s + [COUNTERS] CudaCpp MEs ( 2 ) : 1415.3337s for 90112 events => throughput is 6.37E+01 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322783648085419E-007) and cpp (2.3322783773791503E-007) differ by less than 2E-4 (5.389840573855054e-09) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993212353001E-007) differ by less than 2E-4 (5.389404034161771e-09) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.026996e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.382180e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.025921e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.466526e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403629009850969E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.24e-06 [1.2403985295828471E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 79.7722s - [COUNTERS] Fortran Overhead ( 0 ) : 35.7094s - [COUNTERS] CudaCpp MEs ( 2 ) : 44.0629s for 8192 events => throughput is 1.86E+02 events/s + [COUNTERS] PROGRAM TOTAL : 120.2262s + [COUNTERS] Fortran Overhead ( 0 ) : 54.8384s + [COUNTERS] CudaCpp MEs ( 2 ) : 65.3878s for 8192 events => throughput is 1.25E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403628942014972E-006) and cpp (1.2403629009850969E-006) differ by less than 2E-4 (5.469044328521022e-09) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985295828471E-006) differ by less than 2E-4 (5.473184350179849e-09) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322783784120318E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.332e-07 [2.3322993222645653E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 520.7034s - [COUNTERS] Fortran Overhead ( 0 ) : 38.1340s - [COUNTERS] CudaCpp MEs ( 2 ) : 482.5694s for 90112 events => throughput is 1.87E+02 events/s + [COUNTERS] PROGRAM TOTAL : 782.8292s + [COUNTERS] Fortran Overhead ( 0 ) : 58.6898s + [COUNTERS] CudaCpp MEs ( 2 ) : 724.1394s for 90112 events => throughput is 1.24E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322783648085419E-007) and cpp (2.3322783784120318E-007) differ by less than 2E-4 (5.832704319530535e-09) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993222645653E-007) differ by less than 2E-4 (5.830713245558172e-09) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.332150e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.516262e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.336891e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.516185e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403629007633195E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 34.3120s - [COUNTERS] Fortran Overhead ( 0 ) : 15.4276s - [COUNTERS] CudaCpp MEs ( 2 ) : 18.8844s for 8192 events => throughput is 4.34E+02 events/s + [COUNTERS] PROGRAM TOTAL : 52.1777s + [COUNTERS] Fortran Overhead ( 0 ) : 24.0873s + [COUNTERS] CudaCpp MEs ( 2 ) : 28.0903s for 8192 events => throughput is 2.92E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403628942014972E-006) and cpp (1.2403629007633195E-006) differ by less than 2E-4 (5.290244020628165e-09) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985293629285E-006) differ by less than 2E-4 (5.29588750630694e-09) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,190 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322783783946155E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 225.6923s - [COUNTERS] Fortran Overhead ( 0 ) : 17.9551s - [COUNTERS] CudaCpp MEs ( 2 ) : 207.7372s for 90112 events => throughput is 4.34E+02 events/s + [COUNTERS] PROGRAM TOTAL : 343.2865s + [COUNTERS] Fortran Overhead ( 0 ) : 27.8199s + [COUNTERS] CudaCpp MEs ( 2 ) : 315.4666s for 90112 events => throughput is 2.86E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322783648085419E-007) and cpp (2.3322783783946155E-007) differ by less than 2E-4 (5.825236737422301e-09) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993222447204E-007) differ by less than 2E-4 (5.822204496297445e-09) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.503499e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.529786e+02 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.531384e+02 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 70 events (found 407 events) + [COUNTERS] PROGRAM TOTAL : 45.2330s + [COUNTERS] Fortran Overhead ( 0 ) : 20.3742s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.8588s for 8192 events => throughput is 3.30E+02 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985293629285E-006) differ by less than 2E-4 (5.29588750630694e-09) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 303 events (found 1531 events) + [COUNTERS] PROGRAM TOTAL : 297.0222s + [COUNTERS] Fortran Overhead ( 0 ) : 24.2760s + [COUNTERS] CudaCpp MEs ( 2 ) : 272.7462s for 90112 events => throughput is 3.30E+02 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993222447204E-007) differ by less than 2E-4 (5.822204496297445e-09) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.086618e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.531529e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.034848e+02 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 70 events (found 407 events) + [COUNTERS] PROGRAM TOTAL : 50.6747s + [COUNTERS] Fortran Overhead ( 0 ) : 24.5956s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.0791s for 8192 events => throughput is 3.14E+02 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985293629285E-006) differ by less than 2E-4 (5.29588750630694e-09) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 303 events (found 1531 events) + [COUNTERS] PROGRAM TOTAL : 317.7787s + [COUNTERS] Fortran Overhead ( 0 ) : 28.6879s + [COUNTERS] CudaCpp MEs ( 2 ) : 289.0908s for 90112 events => throughput is 3.12E+02 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993222447204E-007) differ by less than 2E-4 (5.822204496297445e-09) -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.404102e+02 ) sec^-1 -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.409784e+02 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,28 +504,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403628931370709E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.24e-06 [1.2403985217419736E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 12.5547s - [COUNTERS] Fortran Overhead ( 0 ) : 8.2984s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.2563s for 8192 events => throughput is 1.92E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.6204s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7568s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8636s for 8192 events => throughput is 9.49E+03 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403628942014972E-006) and hip (1.2403628931370709E-006) differ by less than 2E-4 (8.581571009358413e-10) +OK! xsec from fortran (1.2403985227939174E-006) and cuda (1.2403985217419736E-006) differ by less than 2E-4 (8.480691704448873e-10) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -391,65 +537,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322783640044522E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.332e-07 [2.3322993078576733E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 57.4092s - [COUNTERS] Fortran Overhead ( 0 ) : 10.6449s - [COUNTERS] CudaCpp MEs ( 2 ) : 46.7643s for 90112 events => throughput is 1.93E+03 events/s + [COUNTERS] PROGRAM TOTAL : 16.2929s + [COUNTERS] Fortran Overhead ( 0 ) : 6.7937s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.4992s for 90112 events => throughput is 9.49E+03 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322783648085419E-007) and hip (2.3322783640044522E-007) differ by less than 2E-4 (3.447657714872321e-10) +OK! xsec from fortran (2.3322993086655967E-007) and cuda (2.3322993078576733E-007) differ by less than 2E-4 (3.464063480507207e-10) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.942273e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.473549e+03 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.976894e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.087340e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.293371e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.107601e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.389005e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.158418e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.289349e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.111818e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.258286e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.107191e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.285627e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.111406e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.050475e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.647341e+03 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 7219562cec..759f07dd06 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu -make USEBUILDDIR=1 BACKEND=hip -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2024-05-15_18:09:11 +DATE: 2024-05-14_21:46:12 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110226551166922] fbridge_mode=0 + [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1817 events) - [COUNTERS] PROGRAM TOTAL : 0.4916s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4435s - [COUNTERS] Fortran MEs ( 1 ) : 0.0482s for 8192 events => throughput is 1.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4845s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4094s + [COUNTERS] Fortran MEs ( 1 ) : 0.0750s for 8192 events => throughput is 1.09E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110226551166922] fbridge_mode=0 + [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3086s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2604s - [COUNTERS] Fortran MEs ( 1 ) : 0.0482s for 8192 events => throughput is 1.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4152s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3402s + [COUNTERS] Fortran MEs ( 1 ) : 0.0750s for 8192 events => throughput is 1.09E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x10_fortran > /tmp/valassia/output_gqttq_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/avalassi/output_gqttq_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510679754343823] fbridge_mode=0 + [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.7352s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2090s - [COUNTERS] Fortran MEs ( 1 ) : 0.5262s for 90112 events => throughput is 1.71E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.4368s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6165s + [COUNTERS] Fortran MEs ( 1 ) : 0.8203s for 90112 events => throughput is 1.10E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110226551166922] fbridge_mode=1 + [XSECTION] Cross section = 0.2711 [0.27110539351263335] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4010s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3306s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0703s for 8192 events => throughput is 1.16E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5011s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4200s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0810s for 8192 events => throughput is 1.01E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110226551166922) and cpp (0.27110226551166922) differ by less than 3E-14 (0.0) +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539351263335) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510679754343820] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510686556561293] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.0619s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2914s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7705s for 90112 events => throughput is 1.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.5731s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6791s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8940s for 90112 events => throughput is 1.01E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510679754343823) and cpp (0.21510679754343820) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561293) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.207564e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.029302e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.207048e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.033246e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110226551166122] fbridge_mode=1 + [XSECTION] Cross section = 0.2711 [0.27110539351262530] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3332s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2993s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0339s for 8192 events => throughput is 2.41E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4255s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3819s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0436s for 8192 events => throughput is 1.88E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110226551166922) and cpp (0.27110226551166122) differ by less than 3E-14 (2.9531932455029164e-14) +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539351262530) differ by less than 3E-14 (2.9531932455029164e-14) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510679754343823] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510686556561281] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.6320s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2568s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3753s for 90112 events => throughput is 2.40E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.1141s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6315s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4826s for 90112 events => throughput is 1.87E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510679754343823) and cpp (0.21510679754343823) differ by less than 3E-14 (0.0) +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561281) differ by less than 3E-14 (6.661338147750939e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.431395e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.900971e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.437646e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.818132e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110226551166922] fbridge_mode=1 + [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.2970s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2796s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0173s for 8192 events => throughput is 4.73E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3894s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3642s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0252s for 8192 events => throughput is 3.25E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110226551166922) and cpp (0.27110226551166922) differ by less than 3E-14 (0.0) +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539351263341) differ by less than 3E-14 (4.440892098500626e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,114 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510679754343823] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.4290s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2386s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1904s for 90112 events => throughput is 4.73E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8967s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6165s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2802s for 90112 events => throughput is 3.22E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510679754343823) and cpp (0.21510679754343823) differ by less than 3E-14 (0.0) +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561295) differ by less than 3E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.773661e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.238269e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.791928e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.199395e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1 + [UNWEIGHT] Wrote 404 events (found 1228 events) + [COUNTERS] PROGRAM TOTAL : 0.3857s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3628s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 8192 events => throughput is 3.59E+05 events/s -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539351263341) differ by less than 3E-14 (4.440892098500626e-16) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 + [UNWEIGHT] Wrote 1939 events (found 1944 events) + [COUNTERS] PROGRAM TOTAL : 1.8679s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6157s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2522s for 90112 events => throughput is 3.57E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561295) differ by less than 3E-14 (0.0) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.606523e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.588739e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,30 +428,176 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' -Memory access fault by GPU node-4 (Agent handle: 0x63ef3d0) on address 0x14ecb03fa000. Reason: Page not present or supervisor privilege. - -Program received signal SIGABRT: Process abort signal. - -Backtrace for this error: -#0 0x14ef468cb90f in ??? -#1 0x14ef46708d2b in ??? -#2 0x14ef4670a3e4 in ??? -#3 0x14ef3dcd1b64 in ??? -#4 0x14ef3dcceb38 in ??? -#5 0x14ef3dc8c496 in ??? -#6 0x14ef468bf6e9 in ??? -#7 0x14ef467d649e in ??? -#8 0xffffffffffffffff in ??? -./madX.sh: line 377: 76073 Aborted (core dumped) $timecmd $cmd < ${tmpin} > ${tmp} -ERROR! ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' failed - PDF set = nn23lo1 - alpha_s(Mz)= 0.1300 running at 2 loops. - alpha_s(Mz)= 0.1300 running at 2 loops. - Renormalization scale set on event-by-event basis - Factorization scale set on event-by-event basis - - - getting user params -Enter number of events and max and min iterations: - Number of events and iterations 8192 1 1 +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1 + [UNWEIGHT] Wrote 404 events (found 1228 events) + [COUNTERS] PROGRAM TOTAL : 0.4110s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3757s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0353s for 8192 events => throughput is 2.32E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539351263341) differ by less than 3E-14 (4.440892098500626e-16) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 + [UNWEIGHT] Wrote 1939 events (found 1944 events) + [COUNTERS] PROGRAM TOTAL : 2.0223s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6326s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3897s for 90112 events => throughput is 2.31E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561295) differ by less than 3E-14 (0.0) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.343272e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.366477e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2711 [0.27110539351263363] fbridge_mode=1 + [UNWEIGHT] Wrote 404 events (found 1228 events) + [COUNTERS] PROGRAM TOTAL : 0.7764s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7757s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.22E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.27110539351263330) and cuda (0.27110539351263363) differ by less than 3E-14 (1.3322676295501878e-15) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2151 [0.21510686556561304] fbridge_mode=1 + [UNWEIGHT] Wrote 1939 events (found 1944 events) + [COUNTERS] PROGRAM TOTAL : 2.0365s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0284s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0081s for 90112 events => throughput is 1.11E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21510686556561295) and cuda (0.21510686556561304) differ by less than 3E-14 (4.440892098500626e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.543217e+07 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.108201e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.511173e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.528369e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.512246e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.808064e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.513388e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.778138e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** + +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index fc013f36c8..286b8a0bd4 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu -make USEBUILDDIR=1 BACKEND=hip + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2024-05-15_18:09:25 +DATE: 2024-05-14_21:46:42 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110226551166922] fbridge_mode=0 + [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1817 events) - [COUNTERS] PROGRAM TOTAL : 0.3546s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3065s - [COUNTERS] Fortran MEs ( 1 ) : 0.0481s for 8192 events => throughput is 1.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4843s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4094s + [COUNTERS] Fortran MEs ( 1 ) : 0.0749s for 8192 events => throughput is 1.09E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110226551166922] fbridge_mode=0 + [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3097s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2615s - [COUNTERS] Fortran MEs ( 1 ) : 0.0482s for 8192 events => throughput is 1.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4167s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3417s + [COUNTERS] Fortran MEs ( 1 ) : 0.0751s for 8192 events => throughput is 1.09E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x10_fortran > /tmp/valassia/output_gqttq_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/avalassi/output_gqttq_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510679754343823] fbridge_mode=0 + [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.7391s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2129s - [COUNTERS] Fortran MEs ( 1 ) : 0.5263s for 90112 events => throughput is 1.71E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.4405s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6199s + [COUNTERS] Fortran MEs ( 1 ) : 0.8207s for 90112 events => throughput is 1.10E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110149549279866] fbridge_mode=1 + [XSECTION] Cross section = 0.2711 [0.27110463093540638] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3747s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3180s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0568s for 8192 events => throughput is 1.44E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4910s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4146s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0764s for 8192 events => throughput is 1.07E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110226551166922) and cpp (0.27110149549279866) differ by less than 4E-4 (2.840326210895583e-06) +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110463093540638) differ by less than 4E-4 (2.812844174915341e-06) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510678843355344] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510686273216112] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.9925s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3638s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6287s for 90112 events => throughput is 1.43E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.5078s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6674s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8404s for 90112 events => throughput is 1.07E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510679754343823) and cpp (0.21510678843355344) differ by less than 4E-4 (4.2350520312872675e-08) +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686273216112) differ by less than 4E-4 (1.3172298474195543e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.473999e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.092729e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.470734e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.094826e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110146988852984] fbridge_mode=1 + [XSECTION] Cross section = 0.2711 [0.27110459152958460] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3018s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2817s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0201s for 8192 events => throughput is 4.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3939s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3669s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0270s for 8192 events => throughput is 3.03E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110226551166922) and cpp (0.27110146988852984) differ by less than 4E-4 (2.934771267448788e-06) +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110459152958460) differ by less than 4E-4 (2.9581965829139634e-06) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510676993136629] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510683016166510] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.4615s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2398s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2218s for 90112 events => throughput is 4.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9150s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6174s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2976s for 90112 events => throughput is 3.03E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510679754343823) and cpp (0.21510676993136629) differ by less than 4E-4 (1.2836447871311663e-07) +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510683016166510) differ by less than 4E-4 (1.6458771667782202e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.153768e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.038989e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.171796e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.060486e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110148793566186] fbridge_mode=1 + [XSECTION] Cross section = 0.2711 [0.27110460595003461] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.2818s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2721s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0097s for 8192 events => throughput is 8.46E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3649s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3515s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0134s for 8192 events => throughput is 6.12E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110226551166922) and cpp (0.27110148793566186) differ by less than 4E-4 (2.8682018052839098e-06) +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110460595003461) differ by less than 4E-4 (2.9050052766654844e-06) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,114 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510676419088856] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510682502089912] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.3330s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2269s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1061s for 90112 events => throughput is 8.49E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7586s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6103s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1483s for 90112 events => throughput is 6.07E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510679754343823) and cpp (0.21510676419088856) differ by less than 4E-4 (1.5505111905511626e-07) +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510682502089912) differ by less than 4E-4 (1.8848637739488083e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.684251e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.085610e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.747759e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.998809e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2711 [0.27110460595003461] fbridge_mode=1 + [UNWEIGHT] Wrote 404 events (found 1228 events) + [COUNTERS] PROGRAM TOTAL : 0.3637s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3513s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0124s for 8192 events => throughput is 6.60E+05 events/s -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110460595003461) differ by less than 4E-4 (2.9050052766654844e-06) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2151 [0.21510682502089912] fbridge_mode=1 + [UNWEIGHT] Wrote 1939 events (found 1944 events) + [COUNTERS] PROGRAM TOTAL : 1.7434s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6053s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1381s for 90112 events => throughput is 6.52E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510682502089912) differ by less than 4E-4 (1.8848637739488083e-07) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.613064e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.625420e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,30 +428,178 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' -Memory access fault by GPU node-4 (Agent handle: 0x63ef3f0) on address 0x1484e4b04000. Reason: Unknown. - -Program received signal SIGABRT: Process abort signal. - -Backtrace for this error: -#0 0x14887af9890f in ??? -#1 0x14887add5d2b in ??? -#2 0x14887add73e4 in ??? -#3 0x14887239eb64 in ??? -#4 0x14887239bb38 in ??? -#5 0x148872359496 in ??? -#6 0x14887af8c6e9 in ??? -#7 0x14887aea349e in ??? -#8 0xffffffffffffffff in ??? -./madX.sh: line 377: 77263 Aborted $timecmd $cmd < ${tmpin} > ${tmp} -ERROR! ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' failed - PDF set = nn23lo1 - alpha_s(Mz)= 0.1300 running at 2 loops. - alpha_s(Mz)= 0.1300 running at 2 loops. - Renormalization scale set on event-by-event basis - Factorization scale set on event-by-event basis - - - getting user params -Enter number of events and max and min iterations: - Number of events and iterations 8192 1 1 +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2711 [0.27110464176080312] fbridge_mode=1 + [UNWEIGHT] Wrote 404 events (found 1228 events) + [COUNTERS] PROGRAM TOTAL : 0.3755s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3577s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0178s for 8192 events => throughput is 4.59E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110464176080312) differ by less than 4E-4 (2.772913590631809e-06) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2151 [0.21510685411522326] fbridge_mode=1 + [UNWEIGHT] Wrote 1939 events (found 1944 events) + [COUNTERS] PROGRAM TOTAL : 1.8107s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6141s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1966s for 90112 events => throughput is 4.58E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510685411522326) differ by less than 4E-4 (5.3231167917999755e-08) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.752745e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.780214e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2711 [0.27110478167944563] fbridge_mode=1 + [UNWEIGHT] Wrote 404 events (found 1228 events) + [COUNTERS] PROGRAM TOTAL : 0.7742s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7737s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.56E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.27110539351263330) and cuda (0.27110478167944563) differ by less than 4E-4 (2.2568093527297606e-06) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2151 [0.21510689885789414] fbridge_mode=1 + [UNWEIGHT] Wrote 1939 events (found 1944 events) + [COUNTERS] PROGRAM TOTAL : 2.0355s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0291s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0064s for 90112 events => throughput is 1.42E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21510686556561295) and cuda (0.21510689885789414) differ by less than 4E-4 (1.547708907700951e-07) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.752013e+07 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.369248e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.053096e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.714374e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.107820e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.803718e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.560411e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.935709e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** + +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index dd4212fa37..ca0042dad1 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu -make USEBUILDDIR=1 BACKEND=hip -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2024-05-15_18:09:37 +DATE: 2024-05-14_21:47:11 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110226551166922] fbridge_mode=0 + [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1817 events) - [COUNTERS] PROGRAM TOTAL : 0.3544s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3063s - [COUNTERS] Fortran MEs ( 1 ) : 0.0481s for 8192 events => throughput is 1.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4845s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4096s + [COUNTERS] Fortran MEs ( 1 ) : 0.0749s for 8192 events => throughput is 1.09E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110226551166922] fbridge_mode=0 + [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3092s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2610s - [COUNTERS] Fortran MEs ( 1 ) : 0.0482s for 8192 events => throughput is 1.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4164s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3414s + [COUNTERS] Fortran MEs ( 1 ) : 0.0749s for 8192 events => throughput is 1.09E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x10_fortran > /tmp/valassia/output_gqttq_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/avalassi/output_gqttq_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510679754343823] fbridge_mode=0 + [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.7403s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2134s - [COUNTERS] Fortran MEs ( 1 ) : 0.5269s for 90112 events => throughput is 1.71E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.4386s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6180s + [COUNTERS] Fortran MEs ( 1 ) : 0.8207s for 90112 events => throughput is 1.10E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110226549005623] fbridge_mode=1 + [XSECTION] Cross section = 0.2711 [0.27110539348916002] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4014s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3320s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0694s for 8192 events => throughput is 1.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4992s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4184s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0808s for 8192 events => throughput is 1.01E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110226551166922) and cpp (0.27110226549005623) differ by less than 2E-4 (7.972267290767832e-11) +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539348916002) differ by less than 2E-4 (8.658362915525686e-11) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510679758658835] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510686560794337] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.0549s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2899s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7649s for 90112 events => throughput is 1.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.5686s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6770s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8916s for 90112 events => throughput is 1.01E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510679754343823) and cpp (0.21510679758658835) differ by less than 2E-4 (2.0059864880295208e-10) +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686560794337) differ by less than 2E-4 (1.967879192932287e-10) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.202899e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.029942e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.204038e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.032493e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110226549005628] fbridge_mode=1 + [XSECTION] Cross section = 0.2711 [0.27110539348916002] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3295s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2963s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0332s for 8192 events => throughput is 2.47E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4253s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3830s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0423s for 8192 events => throughput is 1.93E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110226551166922) and cpp (0.27110226549005628) differ by less than 2E-4 (7.972245086307339e-11) +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539348916002) differ by less than 2E-4 (8.658362915525686e-11) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510679758658832] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510686560794334] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.6204s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2552s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3652s for 90112 events => throughput is 2.47E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.1014s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6354s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4660s for 90112 events => throughput is 1.93E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510679754343823) and cpp (0.21510679758658832) differ by less than 2E-4 (2.0059842675834716e-10) +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686560794334) differ by less than 2E-4 (1.9678769724862377e-10) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.464056e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.920832e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.471288e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.926600e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110226530029391] fbridge_mode=1 + [XSECTION] Cross section = 0.2711 [0.27110539330272815] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.2991s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2816s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0175s for 8192 events => throughput is 4.69E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3906s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3655s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0251s for 8192 events => throughput is 3.26E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110226551166922) and cpp (0.27110226530029391) differ by less than 2E-4 (7.796884249344771e-10) +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539330272815) differ by less than 2E-4 (7.742566587864985e-10) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,114 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510679756340242] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510686558551748] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.4284s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2366s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1918s for 90112 events => throughput is 4.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8987s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6198s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2789s for 90112 events => throughput is 3.23E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510679754343823) and cpp (0.21510679756340242) differ by less than 2E-4 (9.281064805577444e-11) +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686558551748) differ by less than 2E-4 (9.253309229961815e-11) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.783049e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.244752e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.797269e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.318560e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2711 [0.27110539330272815] fbridge_mode=1 + [UNWEIGHT] Wrote 404 events (found 1228 events) + [COUNTERS] PROGRAM TOTAL : 0.3830s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3609s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0220s for 8192 events => throughput is 3.72E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539330272815) differ by less than 2E-4 (7.742566587864985e-10) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2151 [0.21510686558551748] fbridge_mode=1 + [UNWEIGHT] Wrote 1939 events (found 1944 events) + [COUNTERS] PROGRAM TOTAL : 1.8583s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6137s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2446s for 90112 events => throughput is 3.68E+05 events/s -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686558551748) differ by less than 2E-4 (9.253309229961815e-11) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.659431e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.741866e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,30 +428,177 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' -Memory access fault by GPU node-4 (Agent handle: 0x63ef3d0) on address 0x1460b1baa000. Reason: Page not present or supervisor privilege. - -Program received signal SIGABRT: Process abort signal. - -Backtrace for this error: -#0 0x14634807990f in ??? -#1 0x146347eb6d2b in ??? -#2 0x146347eb83e4 in ??? -#3 0x14633f47fb64 in ??? -#4 0x14633f47cb38 in ??? -#5 0x14633f43a496 in ??? -#6 0x14634806d6e9 in ??? -#7 0x146347f8449e in ??? -#8 0xffffffffffffffff in ??? -./madX.sh: line 377: 78444 Aborted $timecmd $cmd < ${tmpin} > ${tmp} -ERROR! ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' failed - PDF set = nn23lo1 - alpha_s(Mz)= 0.1300 running at 2 loops. - alpha_s(Mz)= 0.1300 running at 2 loops. - Renormalization scale set on event-by-event basis - Factorization scale set on event-by-event basis - - - getting user params -Enter number of events and max and min iterations: - Number of events and iterations 8192 1 1 +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2711 [0.27110539330272815] fbridge_mode=1 + [UNWEIGHT] Wrote 404 events (found 1228 events) + [COUNTERS] PROGRAM TOTAL : 0.4131s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3768s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0364s for 8192 events => throughput is 2.25E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539330272815) differ by less than 2E-4 (7.742566587864985e-10) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2151 [0.21510686558551748] fbridge_mode=1 + [UNWEIGHT] Wrote 1939 events (found 1944 events) + [COUNTERS] PROGRAM TOTAL : 2.0378s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6340s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4039s for 90112 events => throughput is 2.23E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686558551748) differ by less than 2E-4 (9.253309229961815e-11) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.283252e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.302923e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2711 [0.27110539343558537] fbridge_mode=1 + [UNWEIGHT] Wrote 404 events (found 1228 events) + [COUNTERS] PROGRAM TOTAL : 0.7774s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7767s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.22E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.27110539351263330) and cuda (0.27110539343558537) differ by less than 2E-4 (2.8419910869104115e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2151 [0.21510686553631395] fbridge_mode=1 + [UNWEIGHT] Wrote 1939 events (found 1944 events) + [COUNTERS] PROGRAM TOTAL : 2.0336s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0256s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0080s for 90112 events => throughput is 1.13E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21510686556561295) and cuda (0.21510686553631395) differ by less than 2E-4 (1.3620671257541517e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.556024e+07 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.988898e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.509950e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.533082e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.514260e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.814949e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.510075e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.782063e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** + +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt index e5a1b85b65..43740fd349 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -make USEBUILDDIR=1 BACKEND=hip -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' OMP_NUM_THREADS= -DATE: 2024-05-15_20:25:01 +DATE: 2024-05-15_02:05:41 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256290] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 3321 events (found 6423 events) - [COUNTERS] PROGRAM TOTAL : 1.1089s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0764s - [COUNTERS] Fortran MEs ( 1 ) : 0.0325s for 8192 events => throughput is 2.52E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9549s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9051s + [COUNTERS] Fortran MEs ( 1 ) : 0.0497s for 8192 events => throughput is 1.65E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256290] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.6289s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5964s - [COUNTERS] Fortran MEs ( 1 ) : 0.0325s for 8192 events => throughput is 2.52E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4317s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3821s + [COUNTERS] Fortran MEs ( 1 ) : 0.0496s for 8192 events => throughput is 1.65E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x10_fortran > /tmp/valassia/output_heftggbb_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp/avalassi/output_heftggbb_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895240377502] fbridge_mode=0 + [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=0 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 4.7741s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4191s - [COUNTERS] Fortran MEs ( 1 ) : 0.3550s for 90112 events => throughput is 2.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8870s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3416s + [COUNTERS] Fortran MEs ( 1 ) : 0.5454s for 90112 events => throughput is 1.65E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256197] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0162955499256148] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.6739s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6384s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0355s for 8192 events => throughput is 2.31E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4624s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4185s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0439s for 8192 events => throughput is 1.86E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256290) and cpp (2.0162955499256197) differ by less than 3E-14 (4.6629367034256575e-15) +OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955499256148) differ by less than 3E-14 (6.661338147750939e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=1 + [XSECTION] Cross section = 2.043 [2.0434895240377564] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 4.8268s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4394s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3874s for 90112 events => throughput is 2.33E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8566s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3716s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4850s for 90112 events => throughput is 1.86E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0434895240377502) and cpp (2.0434895240377569) differ by less than 3E-14 (3.3306690738754696e-15) +OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895240377564) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.414149e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.913736e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.396996e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.920291e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256205] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.6353s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6159s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0195s for 8192 events => throughput is 4.21E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5374s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5126s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0247s for 8192 events => throughput is 3.31E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256290) and cpp (2.0162955499256205) differ by less than 3E-14 (4.218847493575595e-15) +OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955499256161) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=1 + [XSECTION] Cross section = 2.043 [2.0434895240377564] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 4.6389s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4246s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2143s for 90112 events => throughput is 4.20E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6257s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3531s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2727s for 90112 events => throughput is 3.31E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0434895240377502) and cpp (2.0434895240377569) differ by less than 3E-14 (3.3306690738754696e-15) +OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895240377564) differ by less than 3E-14 (2.220446049250313e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.270507e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.389926e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.282088e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.415683e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256205] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0162955499256152] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.6180s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6072s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0108s for 8192 events => throughput is 7.59E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4050s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3901s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0150s for 8192 events => throughput is 5.47E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256290) and cpp (2.0162955499256205) differ by less than 3E-14 (4.218847493575595e-15) +OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955499256152) differ by less than 3E-14 (4.440892098500626e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,190 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895240377564] fbridge_mode=1 + [XSECTION] Cross section = 2.043 [2.0434895240377560] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 4.5526s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4345s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1181s for 90112 events => throughput is 7.63E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5072s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3422s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1650s for 90112 events => throughput is 5.46E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0434895240377502) and cpp (2.0434895240377564) differ by less than 3E-14 (3.1086244689504383e-15) +OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895240377560) differ by less than 3E-14 (4.440892098500626e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.920662e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.625678e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.950346e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.528170e+05 ) sec^-1 -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.016 [2.0162955499256152] fbridge_mode=1 + [UNWEIGHT] Wrote 1617 events (found 1622 events) + [COUNTERS] PROGRAM TOTAL : 0.4043s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3906s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0137s for 8192 events => throughput is 5.99E+05 events/s -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955499256152) differ by less than 3E-14 (4.440892098500626e-16) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.043 [2.0434895240377560] fbridge_mode=1 + [UNWEIGHT] Wrote 1818 events (found 1823 events) + [COUNTERS] PROGRAM TOTAL : 1.5003s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3490s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1513s for 90112 events => throughput is 5.96E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895240377560) differ by less than 3E-14 (4.440892098500626e-16) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.064246e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.053452e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.016 [2.0162955499256152] fbridge_mode=1 + [UNWEIGHT] Wrote 1617 events (found 1622 events) + [COUNTERS] PROGRAM TOTAL : 0.4208s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3980s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 8192 events => throughput is 3.59E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955499256152) differ by less than 3E-14 (4.440892098500626e-16) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.043 [2.0434895240377560] fbridge_mode=1 + [UNWEIGHT] Wrote 1818 events (found 1823 events) + [COUNTERS] PROGRAM TOTAL : 1.6097s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3571s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2526s for 90112 events => throughput is 3.57E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895240377560) differ by less than 3E-14 (4.440892098500626e-16) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.657278e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.678863e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,28 +504,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256223] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0162955499256165] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.9050s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9040s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0010s for 8192 events => throughput is 8.28E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.8151s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8145s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.36E+07 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256290) and hip (2.0162955499256223) differ by less than 3E-14 (3.3306690738754696e-15) +OK! xsec from fortran (2.0162955499256161) and cuda (2.0162955499256165) differ by less than 3E-14 (2.220446049250313e-16) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -391,65 +537,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895240377577] fbridge_mode=1 + [XSECTION] Cross section = 2.043 [2.0434895240377573] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 4.6984s - [COUNTERS] Fortran Overhead ( 0 ) : 4.6873s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0111s for 90112 events => throughput is 8.14E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.7718s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7649s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0069s for 90112 events => throughput is 1.31E+07 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0434895240377502) and hip (2.0434895240377577) differ by less than 3E-14 (3.774758283725532e-15) +OK! xsec from fortran (2.0434895240377569) and cuda (2.0434895240377573) differ by less than 3E-14 (2.220446049250313e-16) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.264685e+06 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.973752e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.969946e+06 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.174363e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.540722e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.100865e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.487275e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.773387e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.521984e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.096045e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.481497e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.046124e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.517096e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.122711e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.024626e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.717733e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt index 5e349cad30..8866575613 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' OMP_NUM_THREADS= -DATE: 2024-05-15_20:25:39 +DATE: 2024-05-15_02:06:09 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256290] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 3321 events (found 6423 events) - [COUNTERS] PROGRAM TOTAL : 0.9801s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9477s - [COUNTERS] Fortran MEs ( 1 ) : 0.0324s for 8192 events => throughput is 2.53E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9537s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9040s + [COUNTERS] Fortran MEs ( 1 ) : 0.0497s for 8192 events => throughput is 1.65E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256290] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.6309s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5985s - [COUNTERS] Fortran MEs ( 1 ) : 0.0324s for 8192 events => throughput is 2.53E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4243s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3746s + [COUNTERS] Fortran MEs ( 1 ) : 0.0497s for 8192 events => throughput is 1.65E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x10_fortran > /tmp/valassia/output_heftggbb_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp/avalassi/output_heftggbb_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895240377502] fbridge_mode=0 + [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=0 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 4.7604s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4051s - [COUNTERS] Fortran MEs ( 1 ) : 0.3553s for 90112 events => throughput is 2.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8857s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3389s + [COUNTERS] Fortran MEs ( 1 ) : 0.5469s for 90112 events => throughput is 1.65E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,43 +124,43 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162897089316618] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0162897371946169] fbridge_mode=1 [UNWEIGHT] Wrote 1620 events (found 1625 events) - [COUNTERS] PROGRAM TOTAL : 0.6585s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6278s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0307s for 8192 events => throughput is 2.67E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4592s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4173s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0419s for 8192 events => throughput is 1.96E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256290) and cpp (2.0162897089316618) differ by less than 4E-4 (2.8968937452189536e-06) +OK! xsec from fortran (2.0162955499256161) and cpp (2.0162897371946169) differ by less than 4E-4 (2.8828764708777044e-06) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** ERROR! events.lhe.cpp.1 and events.lhe.ref.1 differ! -diff /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.cpp.1 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.ref.1 | head -20 +diff /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.cpp.1 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.ref.1 | head -20 6206,6207c6206,6207 -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.59936081269E+01 0.59936081269E+01 0.00000000000E+00 0. -1. -< 5 1 1 2 501 0 0.45273385612E+02 -0.31131305296E+02 0.47763304668E+03 0.48080583909E+03 0.47000000000E+01 0. 1. +< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.59936081260E+01 0.59936081260E+01 0.00000000000E+00 0. -1. +< 5 1 1 2 501 0 0.45273385612E+02 -0.31131305296E+02 0.47763304676E+03 0.48080583916E+03 0.47000000000E+01 0. 1. --- -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.59936081269E+01 0.59936081269E+01 0.00000000000E+00 0. 1. -> 5 1 1 2 501 0 0.45273385612E+02 -0.31131305296E+02 0.47763304668E+03 0.48080583909E+03 0.47000000000E+01 0. -1. +> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.59936081260E+01 0.59936081260E+01 0.00000000000E+00 0. 1. +> 5 1 1 2 501 0 0.45273385612E+02 -0.31131305296E+02 0.47763304676E+03 0.48080583916E+03 0.47000000000E+01 0. -1. 8306,8307c8306,8307 -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.23857997230E+02 0.23857997230E+02 0.00000000000E+00 0. 1. -< 5 1 1 2 501 0 -0.34843521722E+02 0.35239303629E+02 0.13219496701E+02 0.51504607748E+02 0.47000000000E+01 0. -1. +< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.23857997239E+02 0.23857997239E+02 0.00000000000E+00 0. 1. +< 5 1 1 2 501 0 -0.34843521722E+02 0.35239303629E+02 0.13219496682E+02 0.51504607743E+02 0.47000000000E+01 0. -1. --- -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.23857997230E+02 0.23857997230E+02 0.00000000000E+00 0. -1. -> 5 1 1 2 501 0 -0.34843521722E+02 0.35239303629E+02 0.13219496701E+02 0.51504607748E+02 0.47000000000E+01 0. 1. +> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.23857997239E+02 0.23857997239E+02 0.00000000000E+00 0. -1. +> 5 1 1 2 501 0 -0.34843521722E+02 0.35239303629E+02 0.13219496682E+02 0.51504607743E+02 0.47000000000E+01 0. 1. 9606,9619d9605 < 4 1 1E-03 0.1250139E+03 0.7546771E-02 0.1235066E+00 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.94948249861E+03 0.94948249861E+03 0.00000000000E+00 0. 1. -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.41149990064E+01 0.41149990064E+01 0.00000000000E+00 0. -1. -< 5 1 1 2 501 0 -0.96459450317E+01 -0.34409175043E+02 0.83136584828E+02 0.90613560351E+02 0.47000000000E+01 0. -1. -< -5 1 1 2 0 501 0.96459450317E+01 0.34409175043E+02 0.86223091477E+03 0.86298393726E+03 0.47000000000E+01 0. 1. +< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.94948250004E+03 0.94948250004E+03 0.00000000000E+00 0. 1. +< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.41149990002E+01 0.41149990002E+01 0.00000000000E+00 0. -1. +< 5 1 1 2 501 0 -0.96459450317E+01 -0.34409175043E+02 0.83136584965E+02 0.90613560477E+02 0.47000000000E+01 0. -1. +< -5 1 1 2 0 501 0.96459450317E+01 0.34409175043E+02 0.86223091608E+03 0.86298393857E+03 0.47000000000E+01 0. 1. < < 0 0.12501391E+03 diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt index cb821d2765..7a112b526f 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx + +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' OMP_NUM_THREADS= -DATE: 2024-05-15_20:25:48 +DATE: 2024-05-15_02:06:15 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256290] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 3321 events (found 6423 events) - [COUNTERS] PROGRAM TOTAL : 0.9793s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9469s - [COUNTERS] Fortran MEs ( 1 ) : 0.0324s for 8192 events => throughput is 2.53E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9548s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9052s + [COUNTERS] Fortran MEs ( 1 ) : 0.0496s for 8192 events => throughput is 1.65E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256290] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.6302s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5978s - [COUNTERS] Fortran MEs ( 1 ) : 0.0324s for 8192 events => throughput is 2.53E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4258s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3761s + [COUNTERS] Fortran MEs ( 1 ) : 0.0497s for 8192 events => throughput is 1.65E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x10_fortran > /tmp/valassia/output_heftggbb_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp/avalassi/output_heftggbb_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895240377502] fbridge_mode=0 + [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=0 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 4.7775s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4225s - [COUNTERS] Fortran MEs ( 1 ) : 0.3550s for 90112 events => throughput is 2.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8849s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3405s + [COUNTERS] Fortran MEs ( 1 ) : 0.5444s for 90112 events => throughput is 1.66E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,23 +124,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955975931003] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0162955975930954] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.6670s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6324s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0346s for 8192 events => throughput is 2.37E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4626s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4185s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0441s for 8192 events => throughput is 1.86E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256290) and cpp (2.0162955975931003) differ by less than 2E-4 (2.364111328923002e-08) +OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955975930954) differ by less than 2E-4 (2.3641117063988304e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -158,23 +158,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895706383704] fbridge_mode=1 + [XSECTION] Cross section = 2.043 [2.0434895706383660] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 4.8220s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4411s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3808s for 90112 events => throughput is 2.37E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8596s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3731s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4865s for 90112 events => throughput is 1.85E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0434895240377502) and cpp (2.0434895706383704) differ by less than 2E-4 (2.2804433230660948e-08) +OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895706383660) differ by less than 2E-4 (2.2804427679545825e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -182,15 +182,15 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.395962e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.780391e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.379165e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.791166e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,23 +204,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955975931007] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0162955975930958] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.6312s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6126s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0187s for 8192 events => throughput is 4.38E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4249s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4000s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0248s for 8192 events => throughput is 3.30E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256290) and cpp (2.0162955975931007) differ by less than 2E-4 (2.3641113511274625e-08) +OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955975930958) differ by less than 2E-4 (2.364111728603291e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -238,23 +238,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895706383709] fbridge_mode=1 + [XSECTION] Cross section = 2.043 [2.0434895706383669] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 4.6210s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4150s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2059s for 90112 events => throughput is 4.38E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6282s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3511s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2771s for 90112 events => throughput is 3.25E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0434895240377502) and cpp (2.0434895706383709) differ by less than 2E-4 (2.2804433452705553e-08) +OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895706383669) differ by less than 2E-4 (2.2804428123635034e-08) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -262,15 +262,15 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.433005e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.181686e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.453282e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.208038e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,23 +284,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955953691122] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0162955953691082] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.6190s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6084s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0107s for 8192 events => throughput is 7.67E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4056s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3902s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0155s for 8192 events => throughput is 5.30E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256290) and cpp (2.0162955953691122) differ by less than 2E-4 (2.2538106270175717e-08) +OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955953691082) differ by less than 2E-4 (2.253811048902321e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -318,23 +318,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895701243891] fbridge_mode=1 + [XSECTION] Cross section = 2.043 [2.0434895701243878] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 4.5386s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4207s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1179s for 90112 events => throughput is 7.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5122s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3434s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1688s for 90112 events => throughput is 5.34E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0434895240377502) and cpp (2.0434895701243891) differ by less than 2E-4 (2.255291176034291e-08) +OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895701243878) differ by less than 2E-4 (2.255290776354002e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -342,23 +342,177 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.909911e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.673120e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.737995e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.016 [2.0162955953691082] fbridge_mode=1 + [UNWEIGHT] Wrote 1617 events (found 1622 events) + [COUNTERS] PROGRAM TOTAL : 0.4033s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3893s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0139s for 8192 events => throughput is 5.88E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955953691082) differ by less than 2E-4 (2.253811048902321e-08) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.043 [2.0434895701243878] fbridge_mode=1 + [UNWEIGHT] Wrote 1818 events (found 1823 events) + [COUNTERS] PROGRAM TOTAL : 1.4998s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3458s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1539s for 90112 events => throughput is 5.85E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895701243878) differ by less than 2E-4 (2.255290776354002e-08) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.111638e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.967731e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.181877e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.016 [2.0162955953691082] fbridge_mode=1 + [UNWEIGHT] Wrote 1617 events (found 1622 events) + [COUNTERS] PROGRAM TOTAL : 0.4235s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3998s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0237s for 8192 events => throughput is 3.46E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955953691082) differ by less than 2E-4 (2.253811048902321e-08) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.043 [2.0434895701243878] fbridge_mode=1 + [UNWEIGHT] Wrote 1818 events (found 1823 events) + [COUNTERS] PROGRAM TOTAL : 1.6230s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3623s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2607s for 90112 events => throughput is 3.46E+05 events/s -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895701243878) differ by less than 2E-4 (2.255290776354002e-08) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.284728e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.311060e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,28 +524,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955503257880] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0162955503257827] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.8865s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8854s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0011s for 8192 events => throughput is 7.58E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.8168s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8162s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.39E+07 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256290) and hip (2.0162955503257880) differ by less than 2E-4 (1.9846257970357328e-10) +OK! xsec from fortran (2.0162955499256161) and cuda (2.0162955503257827) differ by less than 2E-4 (1.9846613241725208e-10) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -403,65 +557,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895242795763] fbridge_mode=1 + [XSECTION] Cross section = 2.043 [2.0434895242795732] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 4.7094s - [COUNTERS] Fortran Overhead ( 0 ) : 4.6983s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0111s for 90112 events => throughput is 8.11E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.7735s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7667s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0069s for 90112 events => throughput is 1.32E+07 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0434895240377502) and hip (2.0434895242795763) differ by less than 2E-4 (1.1833978241782006e-10) +OK! xsec from fortran (2.0434895240377569) and cuda (2.0434895242795732) differ by less than 2E-4 (1.183348974365117e-10) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.209592e+06 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.993711e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.960419e+06 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.200906e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.541807e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.104870e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.490798e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.706347e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.541143e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.113512e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.479881e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.043454e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.505237e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.115682e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.033287e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.706517e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt index 660bc06eac..363e482fee 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' - make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' OMP_NUM_THREADS= -DATE: 2024-05-15_20:26:52 +DATE: 2024-05-15_02:07:14 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084370E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 1041 events) - [COUNTERS] PROGRAM TOTAL : 1.7225s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3366s - [COUNTERS] Fortran MEs ( 1 ) : 1.3858s for 8192 events => throughput is 5.91E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7125s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3468s + [COUNTERS] Fortran MEs ( 1 ) : 2.3657s for 8192 events => throughput is 3.46E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084370E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.6429s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2567s - [COUNTERS] Fortran MEs ( 1 ) : 1.3862s for 8192 events => throughput is 5.91E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7089s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3433s + [COUNTERS] Fortran MEs ( 1 ) : 2.3656s for 8192 events => throughput is 3.46E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x10_fortran > /tmp/valassia/output_smeftggtttt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /tmp/avalassi/output_smeftggtttt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551438261E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 16.5858s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3354s - [COUNTERS] Fortran MEs ( 1 ) : 15.2503s for 90112 events => throughput is 5.91E+03 events/s + [COUNTERS] PROGRAM TOTAL : 27.8837s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8377s + [COUNTERS] Fortran MEs ( 1 ) : 26.0460s for 90112 events => throughput is 3.46E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084349E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 4.3367s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3009s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.0358s for 8192 events => throughput is 4.02E+03 events/s + [COUNTERS] PROGRAM TOTAL : 5.3314s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7867s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.5447s for 8192 events => throughput is 3.22E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084370E-007) and cpp (7.9896697955084349E-007) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896697955084454E-007) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.667e-07 [7.6668083551438187E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 25.7402s - [COUNTERS] Fortran Overhead ( 0 ) : 3.3444s - [COUNTERS] CudaCpp MEs ( 2 ) : 22.3958s for 90112 events => throughput is 4.02E+03 events/s + [COUNTERS] PROGRAM TOTAL : 32.2519s + [COUNTERS] Fortran Overhead ( 0 ) : 4.2639s + [COUNTERS] CudaCpp MEs ( 2 ) : 27.9881s for 90112 events => throughput is 3.22E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438261E-007) and cpp (7.6668083551438230E-007) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668083551438187E-007) differ by less than 3E-14 (5.551115123125783e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.107270e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.358191e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.064003e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.362821e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084317E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.99e-07 [7.9896697955084412E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.0924s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1907s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9017s for 8192 events => throughput is 9.08E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.9787s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6481s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.3306s for 8192 events => throughput is 6.16E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084370E-007) and cpp (7.9896697955084317E-007) differ by less than 3E-14 (6.661338147750939e-16) +OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896697955084412E-007) differ by less than 3E-14 (4.440892098500626e-16) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551438272E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 12.1607s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2304s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.9302s for 90112 events => throughput is 9.07E+03 events/s + [COUNTERS] PROGRAM TOTAL : 17.7399s + [COUNTERS] Fortran Overhead ( 0 ) : 3.1202s + [COUNTERS] CudaCpp MEs ( 2 ) : 14.6196s for 90112 events => throughput is 6.16E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438261E-007) and cpp (7.6668083551438272E-007) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668083551438230E-007) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.284013e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.391673e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.297424e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.406849e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084359E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.0703s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6717s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3985s for 8192 events => throughput is 2.06E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.5110s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9206s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5903s for 8192 events => throughput is 1.39E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084370E-007) and cpp (7.9896697955084359E-007) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896697955084454E-007) differ by less than 3E-14 (2.220446049250313e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,190 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551438261E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 6.0923s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7291s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.3632s for 90112 events => throughput is 2.07E+04 events/s + [COUNTERS] PROGRAM TOTAL : 8.9045s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3985s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.5060s for 90112 events => throughput is 1.39E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438261E-007) and cpp (7.6668083551438261E-007) differ by less than 3E-14 (0.0) +OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668083551438198E-007) differ by less than 3E-14 (4.440892098500626e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.120327e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.429892e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.429297e+04 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 215 events (found 963 events) + [COUNTERS] PROGRAM TOTAL : 1.3737s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8523s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5214s for 8192 events => throughput is 1.57E+04 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896697955084454E-007) differ by less than 3E-14 (2.220446049250313e-16) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1700 events (found 1705 events) + [COUNTERS] PROGRAM TOTAL : 8.0618s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3228s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.7390s for 90112 events => throughput is 1.57E+04 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668083551438198E-007) differ by less than 3E-14 (4.440892098500626e-16) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.631894e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.118513e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.630679e+04 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 215 events (found 963 events) + [COUNTERS] PROGRAM TOTAL : 1.7374s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0390s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6983s for 8192 events => throughput is 1.17E+04 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896697955084454E-007) differ by less than 3E-14 (2.220446049250313e-16) -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1700 events (found 1705 events) + [COUNTERS] PROGRAM TOTAL : 10.1862s + [COUNTERS] Fortran Overhead ( 0 ) : 2.5119s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.6743s for 90112 events => throughput is 1.17E+04 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668083551438198E-007) differ by less than 3E-14 (4.440892098500626e-16) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.188749e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.192286e+04 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,28 +504,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084359E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.8518s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8019s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0499s for 8192 events => throughput is 1.64E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8347s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8176s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0171s for 8192 events => throughput is 4.78E+05 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084370E-007) and hip (7.9896697955084359E-007) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (7.9896697955084444E-007) and cuda (7.9896697955084454E-007) differ by less than 3E-14 (2.220446049250313e-16) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -391,65 +537,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551438272E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 2.3012s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7529s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5484s for 90112 events => throughput is 1.64E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.4695s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2802s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1893s for 90112 events => throughput is 4.76E+05 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438261E-007) and hip (7.6668083551438272E-007) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (7.6668083551438230E-007) and cuda (7.6668083551438198E-007) differ by less than 3E-14 (4.440892098500626e-16) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.671302e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.844936e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.658395e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.235132e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.213705e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.142804e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.753070e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.412161e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.214795e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.141114e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.223896e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.415015e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.210300e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.148097e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.134102e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.763971e+05 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt index e510c954ab..8c90f7d65e 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -make USEBUILDDIR=1 BACKEND=hip -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' OMP_NUM_THREADS= -DATE: 2024-05-15_20:28:46 +DATE: 2024-05-15_02:09:53 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084370E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 1041 events) - [COUNTERS] PROGRAM TOTAL : 1.6441s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2590s - [COUNTERS] Fortran MEs ( 1 ) : 1.3851s for 8192 events => throughput is 5.91E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7125s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3447s + [COUNTERS] Fortran MEs ( 1 ) : 2.3677s for 8192 events => throughput is 3.46E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084370E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.6534s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2614s - [COUNTERS] Fortran MEs ( 1 ) : 1.3921s for 8192 events => throughput is 5.88E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7098s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3423s + [COUNTERS] Fortran MEs ( 1 ) : 2.3675s for 8192 events => throughput is 3.46E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x10_fortran > /tmp/valassia/output_smeftggtttt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /tmp/avalassi/output_smeftggtttt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551438261E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 16.6000s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3362s - [COUNTERS] Fortran MEs ( 1 ) : 15.2638s for 90112 events => throughput is 5.90E+03 events/s + [COUNTERS] PROGRAM TOTAL : 27.8796s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8397s + [COUNTERS] Fortran MEs ( 1 ) : 26.0399s for 90112 events => throughput is 3.46E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896781657409323E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.99e-07 [7.9896785213255034E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 3.9769s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0958s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8811s for 8192 events => throughput is 4.35E+03 events/s + [COUNTERS] PROGRAM TOTAL : 5.1560s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7117s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.4442s for 8192 events => throughput is 3.35E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084370E-007) and cpp (7.9896781657409323E-007) differ by less than 4E-4 (1.0476318432761644e-06) +OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896785213255034E-007) differ by less than 4E-4 (1.0921373827521563e-06) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668135917139758E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.667e-07 [7.6668138359550833E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 23.8633s - [COUNTERS] Fortran Overhead ( 0 ) : 3.1760s - [COUNTERS] CudaCpp MEs ( 2 ) : 20.6873s for 90112 events => throughput is 4.36E+03 events/s + [COUNTERS] PROGRAM TOTAL : 31.0848s + [COUNTERS] Fortran Overhead ( 0 ) : 4.1980s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.8868s for 90112 events => throughput is 3.35E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438261E-007) and cpp (7.6668135917139758E-007) differ by less than 4E-4 (6.830182661676787e-07) +OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668138359550833E-007) differ by less than 4E-4 (7.148752136920677e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.500510e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.468936e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.505177e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.470461e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896768403674554E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.99e-07 [7.9896766542858863E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.2268s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7483s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4785s for 8192 events => throughput is 1.71E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.7104s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0198s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6906s for 8192 events => throughput is 1.19E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084370E-007) and cpp (7.9896768403674554E-007) differ by less than 4E-4 (8.817459542509454e-07) +OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896766542858863E-007) differ by less than 4E-4 (8.584556829838164e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668124704275563E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.667e-07 [7.6668121906848987E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 7.0749s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8123s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.2627s for 90112 events => throughput is 1.71E+04 events/s + [COUNTERS] PROGRAM TOTAL : 10.0856s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4949s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.5907s for 90112 events => throughput is 1.19E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438261E-007) and cpp (7.6668124704275563E-007) differ by less than 4E-4 (5.367662185840061e-07) +OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668121906848987E-007) differ by less than 4E-4 (5.002787206720427e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.746537e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.208989e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.753023e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.217159e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896764767194441E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.99e-07 [7.9896764408326359E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.6877s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4818s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2059s for 8192 events => throughput is 3.98E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.9411s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6390s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3021s for 8192 events => throughput is 2.71E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084370E-007) and cpp (7.9896764767194441E-007) differ by less than 4E-4 (8.362311807452727e-07) +OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896764408326359E-007) differ by less than 4E-4 (8.31739528805997e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,190 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668127564727394E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.667e-07 [7.6668124799901306E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 3.8088s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5452s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.2636s for 90112 events => throughput is 3.98E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.4424s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1114s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.3310s for 90112 events => throughput is 2.71E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438261E-007) and cpp (7.6668127564727394E-007) differ by less than 4E-4 (5.740757704764121e-07) +OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668124799901306E-007) differ by less than 4E-4 (5.380134884269694e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.111165e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.798166e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.773514e+04 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.99e-07 [7.9896764408326359E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 215 events (found 963 events) + [COUNTERS] PROGRAM TOTAL : 0.8751s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6051s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2700s for 8192 events => throughput is 3.03E+04 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896764408326359E-007) differ by less than 4E-4 (8.31739528805997e-07) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.667e-07 [7.6668124799901306E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1700 events (found 1705 events) + [COUNTERS] PROGRAM TOTAL : 5.0437s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0874s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.9562s for 90112 events => throughput is 3.05E+04 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668124799901306E-007) differ by less than 4E-4 (5.380134884269694e-07) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.174347e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.110104e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.165933e+04 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.99e-07 [7.9896778056937195E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 215 events (found 963 events) + [COUNTERS] PROGRAM TOTAL : 1.0418s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6907s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3511s for 8192 events => throughput is 2.33E+04 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896778056937195E-007) differ by less than 4E-4 (1.0025677505964836e-06) -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.667e-07 [7.6668139178203571E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1700 events (found 1705 events) + [COUNTERS] PROGRAM TOTAL : 6.0003s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1658s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.8345s for 90112 events => throughput is 2.35E+04 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668139178203571E-007) differ by less than 4E-4 (7.255530953820255e-07) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.388563e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.384278e+04 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,28 +504,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896777191982386E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.99e-07 [7.9896805369365078E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.6564s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6304s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0260s for 8192 events => throughput is 3.15E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8239s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8100s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0139s for 8192 events => throughput is 5.88E+05 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084370E-007) and hip (7.9896777191982386E-007) differ by less than 4E-4 (9.917418370974929e-07) +OK! xsec from fortran (7.9896697955084444E-007) and cuda (7.9896805369365078E-007) differ by less than 4E-4 (1.3444145174901223e-06) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -391,65 +537,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668141007936531E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.667e-07 [7.6668194616292154E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 1.9498s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6655s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2843s for 90112 events => throughput is 3.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.4314s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2776s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1538s for 90112 events => throughput is 5.86E+05 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438261E-007) and hip (7.6668141007936531E-007) differ by less than 4E-4 (7.494187359569082e-07) +OK! xsec from fortran (7.6668083551438230E-007) and cuda (7.6668194616292154E-007) differ by less than 4E-4 (1.4486452351025747e-06) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.236838e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.225876e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.235081e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.504759e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.176113e+06 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.365193e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.011828e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.375280e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.174356e+06 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.353719e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.963341e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.385176e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.170684e+06 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.374018e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.676487e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.819946e+05 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt index 39374e1be7..c4e3a5b9c2 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -make USEBUILDDIR=1 BACKEND=hip + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' OMP_NUM_THREADS= -DATE: 2024-05-15_20:30:24 +DATE: 2024-05-15_02:12:03 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084370E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 1041 events) - [COUNTERS] PROGRAM TOTAL : 1.6769s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2915s - [COUNTERS] Fortran MEs ( 1 ) : 1.3854s for 8192 events => throughput is 5.91E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7093s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3438s + [COUNTERS] Fortran MEs ( 1 ) : 2.3655s for 8192 events => throughput is 3.46E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084370E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.6679s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2823s - [COUNTERS] Fortran MEs ( 1 ) : 1.3856s for 8192 events => throughput is 5.91E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7079s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3415s + [COUNTERS] Fortran MEs ( 1 ) : 2.3664s for 8192 events => throughput is 3.46E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x10_fortran > /tmp/valassia/output_smeftggtttt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /tmp/avalassi/output_smeftggtttt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551438261E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 16.5784s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3357s - [COUNTERS] Fortran MEs ( 1 ) : 15.2427s for 90112 events => throughput is 5.91E+03 events/s + [COUNTERS] PROGRAM TOTAL : 27.8728s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8352s + [COUNTERS] Fortran MEs ( 1 ) : 26.0376s for 90112 events => throughput is 3.46E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896696375074458E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.99e-07 [7.9896696375074447E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 4.3391s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2879s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.0512s for 8192 events => throughput is 3.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 5.3771s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8109s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.5662s for 8192 events => throughput is 3.19E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084370E-007) and cpp (7.9896696375074458E-007) differ by less than 2E-4 (1.9775659776399834e-08) +OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896696375074447E-007) differ by less than 2E-4 (1.9775660775600556e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668081976882384E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.667e-07 [7.6668081976882373E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 26.0217s - [COUNTERS] Fortran Overhead ( 0 ) : 3.3613s - [COUNTERS] CudaCpp MEs ( 2 ) : 22.6604s for 90112 events => throughput is 3.98E+03 events/s + [COUNTERS] PROGRAM TOTAL : 32.7261s + [COUNTERS] Fortran Overhead ( 0 ) : 4.2892s + [COUNTERS] CudaCpp MEs ( 2 ) : 28.4369s for 90112 events => throughput is 3.17E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438261E-007) and cpp (7.6668081976882384E-007) differ by less than 2E-4 (2.0537305855938826e-08) +OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668081976882373E-007) differ by less than 2E-4 (2.0537305522871918e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.091466e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.328150e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.087797e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.339265e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896696285825699E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.99e-07 [7.9896696285825688E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.0554s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1572s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8982s for 8192 events => throughput is 9.12E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.9285s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6200s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.3084s for 8192 events => throughput is 6.26E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084370E-007) and cpp (7.9896696285825699E-007) differ by less than 2E-4 (2.0892711671827158e-08) +OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896696285825688E-007) differ by less than 2E-4 (2.089271267102788e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668081890954439E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.667e-07 [7.6668081890954375E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 12.1084s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2445s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.8639s for 90112 events => throughput is 9.14E+03 events/s + [COUNTERS] PROGRAM TOTAL : 17.4683s + [COUNTERS] Fortran Overhead ( 0 ) : 3.0870s + [COUNTERS] CudaCpp MEs ( 2 ) : 14.3813s for 90112 events => throughput is 6.27E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438261E-007) and cpp (7.6668081890954439E-007) differ by less than 2E-4 (2.1658084325970606e-08) +OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668081890954375E-007) differ by less than 2E-4 (2.1658084770059816e-08) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.341538e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.594909e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.375998e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.618255e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,8 +276,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -285,13 +285,13 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896696427369838E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.0608s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6632s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3976s for 8192 events => throughput is 2.06E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.5026s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9158s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5869s for 8192 events => throughput is 1.40E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084370E-007) and cpp (7.9896696427369838E-007) differ by less than 2E-4 (1.912112224111695e-08) +OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896696427369838E-007) differ by less than 2E-4 (1.9121123240317672e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,8 +309,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -318,35 +318,181 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668082030339872E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 6.0854s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7321s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.3533s for 90112 events => throughput is 2.07E+04 events/s + [COUNTERS] PROGRAM TOTAL : 8.8525s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3961s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.4564s for 90112 events => throughput is 1.40E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438261E-007) and cpp (7.6668082030339872E-007) differ by less than 2E-4 (1.984004716071297e-08) +OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668082030339872E-007) differ by less than 2E-4 (1.984004671662376e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.116124e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.440340e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.445000e+04 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.99e-07 [7.9896696427369838E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 215 events (found 963 events) + [COUNTERS] PROGRAM TOTAL : 1.3614s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8452s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5162s for 8192 events => throughput is 1.59E+04 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896696427369838E-007) differ by less than 2E-4 (1.9121123240317672e-08) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.667e-07 [7.6668082030339872E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1700 events (found 1705 events) + [COUNTERS] PROGRAM TOTAL : 7.9985s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3165s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.6819s for 90112 events => throughput is 1.59E+04 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668082030339872E-007) differ by less than 2E-4 (1.984004671662376e-08) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.641936e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.120997e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.647430e+04 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.99e-07 [7.9896696427369838E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 215 events (found 963 events) + [COUNTERS] PROGRAM TOTAL : 1.7613s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0489s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7124s for 8192 events => throughput is 1.15E+04 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896696427369838E-007) differ by less than 2E-4 (1.9121123240317672e-08) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.667e-07 [7.6668082030339872E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1700 events (found 1705 events) + [COUNTERS] PROGRAM TOTAL : 10.3619s + [COUNTERS] Fortran Overhead ( 0 ) : 2.5262s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.8357s for 90112 events => throughput is 1.15E+04 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668082030339872E-007) differ by less than 2E-4 (1.984004671662376e-08) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.168837e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.170011e+04 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,28 +504,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697918297697E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.99e-07 [7.9896697918297644E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.7505s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7005s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0500s for 8192 events => throughput is 1.64E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8333s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8161s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0172s for 8192 events => throughput is 4.77E+05 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084370E-007) and hip (7.9896697918297697E-007) differ by less than 2E-4 (4.604279180142612e-10) +OK! xsec from fortran (7.9896697955084444E-007) and cuda (7.9896697918297644E-007) differ by less than 2E-4 (4.6042958334879813e-10) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -391,65 +537,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551547613E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.667e-07 [7.6668083551547592E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 2.2767s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7276s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5491s for 90112 events => throughput is 1.64E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.4751s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2851s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1899s for 90112 events => throughput is 4.74E+05 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438261E-007) and hip (7.6668083551547613E-007) differ by less than 2E-4 (1.426192497433476e-12) +OK! xsec from fortran (7.6668083551438230E-007) and cuda (7.6668083551547592E-007) differ by less than 2E-4 (1.4264145420384011e-12) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.665614e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.824575e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.657467e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.192856e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.209272e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.152345e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.748491e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.384307e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.206625e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.153285e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.213329e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.379355e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.205557e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.102798e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.132251e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.746210e+05 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt index be43c59a71..7ab99ff224 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -make USEBUILDDIR=1 BACKEND=hip -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' OMP_NUM_THREADS= -DATE: 2024-05-15_20:26:40 +DATE: 2024-05-15_02:07:00 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,8 +49,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 685 events (found 2208 events) - [COUNTERS] PROGRAM TOTAL : 0.4893s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4835s - [COUNTERS] Fortran MEs ( 1 ) : 0.0059s for 8192 events => throughput is 1.40E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4152s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4058s + [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.71E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,8 +74,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 648 events (found 1275 events) - [COUNTERS] PROGRAM TOTAL : 0.2460s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2402s - [COUNTERS] Fortran MEs ( 1 ) : 0.0058s for 8192 events => throughput is 1.41E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3215s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3121s + [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.72E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x10_fortran > /tmp/valassia/output_susyggt1t1_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /tmp/avalassi/output_susyggt1t1_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.4762 [0.47620722822826017] fbridge_mode=0 + [XSECTION] Cross section = 0.4762 [0.47620722822826000] fbridge_mode=0 [UNWEIGHT] Wrote 1784 events (found 1789 events) - [COUNTERS] PROGRAM TOTAL : 1.0929s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0305s - [COUNTERS] Fortran MEs ( 1 ) : 0.0624s for 90112 events => throughput is 1.44E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3987s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2968s + [COUNTERS] Fortran MEs ( 1 ) : 0.1019s for 90112 events => throughput is 8.85E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,15 +124,15 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 [XSECTION] ERROR! No cross section in log file: - /tmp/valassia/output_susyggt1t1_x1_cudacpp + /tmp/avalassi/output_susyggt1t1_x1_cudacpp ... xqcutij # 3> 0.0 0.0 RESET CUMULATIVE VARIABLE diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt index 3b008415cf..409bad5208 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x + +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' - make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' OMP_NUM_THREADS= -DATE: 2024-05-15_20:26:44 +DATE: 2024-05-15_02:07:05 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,8 +49,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 685 events (found 2208 events) - [COUNTERS] PROGRAM TOTAL : 0.3149s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3091s - [COUNTERS] Fortran MEs ( 1 ) : 0.0058s for 8192 events => throughput is 1.40E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4157s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4063s + [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.68E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,8 +74,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 648 events (found 1275 events) - [COUNTERS] PROGRAM TOTAL : 0.2480s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2421s - [COUNTERS] Fortran MEs ( 1 ) : 0.0058s for 8192 events => throughput is 1.41E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3201s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3107s + [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.71E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x10_fortran > /tmp/valassia/output_susyggt1t1_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /tmp/avalassi/output_susyggt1t1_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.4762 [0.47620722822826017] fbridge_mode=0 + [XSECTION] Cross section = 0.4762 [0.47620722822826000] fbridge_mode=0 [UNWEIGHT] Wrote 1784 events (found 1789 events) - [COUNTERS] PROGRAM TOTAL : 1.1174s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0550s - [COUNTERS] Fortran MEs ( 1 ) : 0.0624s for 90112 events => throughput is 1.44E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.4023s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2985s + [COUNTERS] Fortran MEs ( 1 ) : 0.1038s for 90112 events => throughput is 8.68E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,15 +124,15 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 [XSECTION] ERROR! No cross section in log file: - /tmp/valassia/output_susyggt1t1_x1_cudacpp + /tmp/avalassi/output_susyggt1t1_x1_cudacpp ... xqcutij # 3> 0.0 0.0 RESET CUMULATIVE VARIABLE diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt index 41f5f3e87b..ab75757909 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -make USEBUILDDIR=1 BACKEND=hip -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' OMP_NUM_THREADS= -DATE: 2024-05-15_20:26:48 +DATE: 2024-05-15_02:07:09 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,8 +49,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 685 events (found 2208 events) - [COUNTERS] PROGRAM TOTAL : 0.3111s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3053s - [COUNTERS] Fortran MEs ( 1 ) : 0.0058s for 8192 events => throughput is 1.40E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4151s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4057s + [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.70E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,8 +74,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 648 events (found 1275 events) - [COUNTERS] PROGRAM TOTAL : 0.2484s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2426s - [COUNTERS] Fortran MEs ( 1 ) : 0.0058s for 8192 events => throughput is 1.41E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3196s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3102s + [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.70E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x10_fortran > /tmp/valassia/output_susyggt1t1_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /tmp/avalassi/output_susyggt1t1_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.4762 [0.47620722822826017] fbridge_mode=0 + [XSECTION] Cross section = 0.4762 [0.47620722822826000] fbridge_mode=0 [UNWEIGHT] Wrote 1784 events (found 1789 events) - [COUNTERS] PROGRAM TOTAL : 1.0995s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0371s - [COUNTERS] Fortran MEs ( 1 ) : 0.0624s for 90112 events => throughput is 1.44E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.4054s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3024s + [COUNTERS] Fortran MEs ( 1 ) : 0.1030s for 90112 events => throughput is 8.75E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,15 +124,15 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 [XSECTION] ERROR! No cross section in log file: - /tmp/valassia/output_susyggt1t1_x1_cudacpp + /tmp/avalassi/output_susyggt1t1_x1_cudacpp ... xqcutij # 3> 0.0 0.0 RESET CUMULATIVE VARIABLE diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt index 70943a347c..7f72709307 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx + +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-05-15_20:26:25 +DATE: 2024-05-15_02:06:42 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860065419849] fbridge_mode=0 + [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 2620 events (found 5403 events) - [COUNTERS] PROGRAM TOTAL : 0.6804s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6518s - [COUNTERS] Fortran MEs ( 1 ) : 0.0286s for 8192 events => throughput is 2.86E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8319s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7874s + [COUNTERS] Fortran MEs ( 1 ) : 0.0445s for 8192 events => throughput is 1.84E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860065419849] fbridge_mode=0 + [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3179s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2894s - [COUNTERS] Fortran MEs ( 1 ) : 0.0285s for 8192 events => throughput is 2.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4226s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3781s + [COUNTERS] Fortran MEs ( 1 ) : 0.0445s for 8192 events => throughput is 1.84E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x10_fortran > /tmp/valassia/output_susyggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp/avalassi/output_susyggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.58 [44.577523870256471] fbridge_mode=0 + [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.3369s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0254s - [COUNTERS] Fortran MEs ( 1 ) : 0.3115s for 90112 events => throughput is 2.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8349s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3481s + [COUNTERS] Fortran MEs ( 1 ) : 0.4868s for 90112 events => throughput is 1.85E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,19 +124,19 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 171.8 [171.81273026311092] fbridge_mode=1 + [XSECTION] Cross section = 171.8 [171.81273026311101] fbridge_mode=1 [UNWEIGHT] Wrote 2338 events (found 3965 events) - [COUNTERS] PROGRAM TOTAL : 0.5519s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5198s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0322s for 8192 events => throughput is 2.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7087s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6687s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0400s for 8192 events => throughput is 2.05E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -ERROR! xsec from fortran (44.598860065419849) and cpp (171.81273026311092) differ by more than 3E-14 (2.8524018329411867) +ERROR! xsec from fortran (44.598860065419856) and cpp (171.81273026311101) differ by more than 3E-14 (2.852401832941188) diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt index 425a8480ec..a4e4ae047f 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-05-15_20:26:30 +DATE: 2024-05-15_02:06:48 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860065419849] fbridge_mode=0 + [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 2620 events (found 5403 events) - [COUNTERS] PROGRAM TOTAL : 0.5880s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5595s - [COUNTERS] Fortran MEs ( 1 ) : 0.0286s for 8192 events => throughput is 2.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8388s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7944s + [COUNTERS] Fortran MEs ( 1 ) : 0.0445s for 8192 events => throughput is 1.84E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860065419849] fbridge_mode=0 + [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3208s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2922s - [COUNTERS] Fortran MEs ( 1 ) : 0.0286s for 8192 events => throughput is 2.86E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4222s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3777s + [COUNTERS] Fortran MEs ( 1 ) : 0.0445s for 8192 events => throughput is 1.84E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x10_fortran > /tmp/valassia/output_susyggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp/avalassi/output_susyggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.58 [44.577523870256471] fbridge_mode=0 + [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.3417s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0294s - [COUNTERS] Fortran MEs ( 1 ) : 0.3122s for 90112 events => throughput is 2.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8423s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3553s + [COUNTERS] Fortran MEs ( 1 ) : 0.4870s for 90112 events => throughput is 1.85E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,19 +124,19 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 171.8 [171.81269679287095] fbridge_mode=1 + [XSECTION] Cross section = 171.8 [171.81270286137041] fbridge_mode=1 [UNWEIGHT] Wrote 2338 events (found 3965 events) - [COUNTERS] PROGRAM TOTAL : 0.5181s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4904s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0276s for 8192 events => throughput is 2.96E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7160s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6786s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0374s for 8192 events => throughput is 2.19E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -ERROR! xsec from fortran (44.598860065419849) and cpp (171.81269679287095) differ by more than 4E-4 (2.8524010824681945) +ERROR! xsec from fortran (44.598860065419856) and cpp (171.81270286137041) differ by more than 4E-4 (2.8524012185366816) diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt index 1003afbf74..ff6733802e 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx + +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-05-15_20:26:35 +DATE: 2024-05-15_02:06:54 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860065419849] fbridge_mode=0 + [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 2620 events (found 5403 events) - [COUNTERS] PROGRAM TOTAL : 0.5896s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5608s - [COUNTERS] Fortran MEs ( 1 ) : 0.0288s for 8192 events => throughput is 2.84E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8341s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7896s + [COUNTERS] Fortran MEs ( 1 ) : 0.0445s for 8192 events => throughput is 1.84E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860065419849] fbridge_mode=0 + [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3195s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2910s - [COUNTERS] Fortran MEs ( 1 ) : 0.0285s for 8192 events => throughput is 2.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4214s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3769s + [COUNTERS] Fortran MEs ( 1 ) : 0.0445s for 8192 events => throughput is 1.84E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x10_fortran > /tmp/valassia/output_susyggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp/avalassi/output_susyggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.58 [44.577523870256471] fbridge_mode=0 + [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.3423s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0301s - [COUNTERS] Fortran MEs ( 1 ) : 0.3122s for 90112 events => throughput is 2.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8373s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3500s + [COUNTERS] Fortran MEs ( 1 ) : 0.4873s for 90112 events => throughput is 1.85E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,19 +124,19 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 171.8 [171.81273490068895] fbridge_mode=1 + [XSECTION] Cross section = 171.8 [171.81273490068889] fbridge_mode=1 [UNWEIGHT] Wrote 2338 events (found 3965 events) - [COUNTERS] PROGRAM TOTAL : 0.5275s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4943s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0331s for 8192 events => throughput is 2.47E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7153s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6748s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0405s for 8192 events => throughput is 2.02E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -ERROR! xsec from fortran (44.598860065419849) and cpp (171.81273490068895) differ by more than 2E-4 (2.8524019369254145) +ERROR! xsec from fortran (44.598860065419856) and cpp (171.81273490068889) differ by more than 2E-4 (2.8524019369254128) diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index 4bcfaca743..013110c0dd 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-15_15:53:31 +DATE: 2024-05-15_08:50:09 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/gcheck.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.335614e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.106638e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.335347e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 -TOTAL : 0.505437 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.805405e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.952080e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.190913e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.672339 sec INFO: No Floating Point Exceptions have been reported - 1,351,686,749 cycles:u # 2.532 GHz (73.05%) - 2,234,086 stalled-cycles-frontend:u # 0.17% frontend cycles idle (73.19%) - 5,889,218 stalled-cycles-backend:u # 0.44% backend cycles idle (74.89%) - 2,060,192,671 instructions:u # 1.52 insn per cycle - # 0.00 stalled cycles per insn (75.94%) - 0.578165423 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/runTest.exe + 2,542,567,494 cycles # 2.815 GHz + 3,941,097,411 instructions # 1.55 insn per cycle + 0.969294589 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165208E-002 -Relative difference = 1.0277079981222336e-08 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.240241e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.415912e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.415912e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.805075 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.054726e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.237655e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.237655e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.385002 sec INFO: No Floating Point Exceptions have been reported - 19,600,879,999 cycles:u # 3.372 GHz (74.93%) - 50,975,682 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.02%) - 61,790,125 stalled-cycles-backend:u # 0.32% backend cycles idle (75.01%) - 46,990,521,493 instructions:u # 2.40 insn per cycle - # 0.00 stalled cycles per insn (74.96%) - 5.820541450 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 472) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe + 18,269,202,165 cycles # 2.859 GHz + 43,967,069,972 instructions # 2.41 insn per cycle + 6.390334100 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.927481e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.425900e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.425900e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.992293 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.552394e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.025542e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.025542e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.476521 sec INFO: No Floating Point Exceptions have been reported - 13,244,208,331 cycles:u # 3.309 GHz (75.01%) - 51,613,494 stalled-cycles-frontend:u # 0.39% frontend cycles idle (75.02%) - 1,582,031,183 stalled-cycles-backend:u # 11.95% backend cycles idle (75.02%) - 31,158,458,120 instructions:u # 2.35 insn per cycle - # 0.05 stalled cycles per insn (75.02%) - 4.006583677 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1626) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe + 12,728,052,416 cycles # 2.841 GHz + 30,998,789,943 instructions # 2.44 insn per cycle + 4.481749609 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1644) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.643596e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.529991e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.529991e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.106384 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.855020e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.546950e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.546950e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.813175 sec +INFO: No Floating Point Exceptions have been reported + 10,046,089,223 cycles # 2.632 GHz + 19,366,150,955 instructions # 1.93 insn per cycle + 3.818350781 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1966) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.996112e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.810288e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.810288e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.569526 sec INFO: No Floating Point Exceptions have been reported - 10,162,252,294 cycles:u # 3.260 GHz (74.93%) - 48,687,080 stalled-cycles-frontend:u # 0.48% frontend cycles idle (75.06%) - 385,968,837 stalled-cycles-backend:u # 3.80% backend cycles idle (75.11%) - 19,353,357,901 instructions:u # 1.90 insn per cycle - # 0.02 stalled cycles per insn (75.11%) - 3.121076222 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1964) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe + 9,728,622,739 cycles # 2.723 GHz + 18,978,342,140 instructions # 1.95 insn per cycle + 3.574740866 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1690) (512y: 181) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165090E-002 -Relative difference = 1.0277089176796747e-08 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.669755e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.196947e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.196947e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.188754 sec +INFO: No Floating Point Exceptions have been reported + 8,562,533,600 cycles # 2.042 GHz + 15,730,003,481 instructions # 1.84 insn per cycle + 4.193992786 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 901) (512y: 154) (512z: 1258) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt index bbb46d465f..cf13a69eb9 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt @@ -1,182 +1,231 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-15_16:23:42 +DATE: 2024-05-15_09:17:38 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/gcheck.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 12 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.504140e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.315586e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.315586e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.526875 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.463666e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.527720e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.527720e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 2.331144 sec INFO: No Floating Point Exceptions have been reported - 18,387,040,799 cycles:u # 3.306 GHz (74.99%) - 219,803,785 stalled-cycles-frontend:u # 1.20% frontend cycles idle (75.01%) - 6,863,615,759 stalled-cycles-backend:u # 37.33% backend cycles idle (74.99%) - 17,261,403,497 instructions:u # 0.94 insn per cycle - # 0.40 stalled cycles per insn (74.97%) - 5.586710375 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/runTest.exe + 7,322,660,282 cycles # 2.842 GHz + 13,188,655,592 instructions # 1.80 insn per cycle + 2.634504246 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165208E-002 -Relative difference = 1.0277079981222336e-08 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.226101e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.395316e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.395316e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.960017 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.018307e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.187576e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.187576e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.804531 sec INFO: No Floating Point Exceptions have been reported - 19,972,737,786 cycles:u # 3.340 GHz (74.99%) - 53,101,436 stalled-cycles-frontend:u # 0.27% frontend cycles idle (74.99%) - 110,369,686 stalled-cycles-backend:u # 0.55% backend cycles idle (74.99%) - 47,264,809,914 instructions:u # 2.37 insn per cycle - # 0.00 stalled cycles per insn (74.99%) - 5.984037268 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 472) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe + 19,459,627,277 cycles # 2.858 GHz + 44,193,955,664 instructions # 2.27 insn per cycle + 6.811255280 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.873295e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.344627e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.344627e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.220034 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.482019e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.908717e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.908717e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.879908 sec INFO: No Floating Point Exceptions have been reported - 13,886,084,795 cycles:u # 3.276 GHz (74.90%) - 54,329,570 stalled-cycles-frontend:u # 0.39% frontend cycles idle (74.94%) - 1,624,045,452 stalled-cycles-backend:u # 11.70% backend cycles idle (75.03%) - 31,905,363,866 instructions:u # 2.30 insn per cycle - # 0.05 stalled cycles per insn (75.09%) - 4.243404225 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1626) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe + 13,957,313,163 cycles # 2.857 GHz + 31,844,057,421 instructions # 2.28 insn per cycle + 4.886813449 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1644) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.545906e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.342715e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.342715e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.341813 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.812993e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.461899e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.461899e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.110424 sec INFO: No Floating Point Exceptions have been reported - 10,768,607,979 cycles:u # 3.204 GHz (75.03%) - 51,098,851 stalled-cycles-frontend:u # 0.47% frontend cycles idle (75.01%) - 386,779,720 stalled-cycles-backend:u # 3.59% backend cycles idle (74.90%) - 20,765,206,333 instructions:u # 1.93 insn per cycle - # 0.02 stalled cycles per insn (74.90%) - 3.364940030 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1964) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe + 11,257,664,295 cycles # 2.735 GHz + 20,727,911,170 instructions # 1.84 insn per cycle + 4.117109353 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1966) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165090E-002 -Relative difference = 1.0277089176796747e-08 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.869279e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.573803e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.573803e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.004022 sec +INFO: No Floating Point Exceptions have been reported + 10,948,992,779 cycles # 2.731 GHz + 20,338,868,359 instructions # 1.86 insn per cycle + 4.010773587 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1690) (512y: 181) (512z: 0) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.576100e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.035998e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.035998e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.638455 sec +INFO: No Floating Point Exceptions have been reported + 9,862,529,153 cycles # 2.123 GHz + 16,873,171,812 instructions # 1.71 insn per cycle + 4.645277576 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 901) (512y: 154) (512z: 1258) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt index 079279b6ae..e8daf97743 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-15_16:30:49 +DATE: 2024-05-15_09:28:43 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/gcheck.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.261896e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.101320e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.330734e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.531146e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.607248e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.140926e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.650614 sec +TOTAL : 1.379056 sec INFO: No Floating Point Exceptions have been reported - 15,390,822,100 cycles:u # 3.291 GHz (74.96%) - 154,767,236 stalled-cycles-frontend:u # 1.01% frontend cycles idle (74.96%) - 6,896,164,043 stalled-cycles-backend:u # 44.81% backend cycles idle (75.04%) - 11,598,353,813 instructions:u # 0.75 insn per cycle - # 0.59 stalled cycles per insn (75.02%) - 4.701228878 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/runTest.exe + 4,577,843,489 cycles # 2.841 GHz + 7,096,304,812 instructions # 1.55 insn per cycle + 1.668143278 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165208E-002 -Relative difference = 1.0277079981222336e-08 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.241924e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.415026e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.415026e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.053954e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.236082e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.236082e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.784111 sec +TOTAL : 6.760962 sec INFO: No Floating Point Exceptions have been reported - 19,582,424,359 cycles:u # 3.380 GHz (75.01%) - 52,626,328 stalled-cycles-frontend:u # 0.27% frontend cycles idle (75.01%) - 62,580,532 stalled-cycles-backend:u # 0.32% backend cycles idle (75.01%) - 47,002,468,034 instructions:u # 2.40 insn per cycle - # 0.00 stalled cycles per insn (75.01%) - 5.794589010 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 472) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe + 19,360,467,507 cycles # 2.862 GHz + 44,070,598,018 instructions # 2.28 insn per cycle + 6.766561857 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.924408e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.423249e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.423249e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.564029e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.045025e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.045025e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.991413 sec +TOTAL : 4.813937 sec INFO: No Floating Point Exceptions have been reported - 13,266,186,109 cycles:u # 3.315 GHz (75.01%) - 52,004,169 stalled-cycles-frontend:u # 0.39% frontend cycles idle (75.01%) - 1,576,241,201 stalled-cycles-backend:u # 11.88% backend cycles idle (75.01%) - 31,161,679,732 instructions:u # 2.35 insn per cycle - # 0.05 stalled cycles per insn (75.01%) - 4.003012510 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1626) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe + 13,789,517,550 cycles # 2.862 GHz + 31,001,132,263 instructions # 2.25 insn per cycle + 4.819327883 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1644) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.657137e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.541237e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.541237e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.923166e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.678985e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.678985e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 4.064292 sec +INFO: No Floating Point Exceptions have been reported + 11,129,699,703 cycles # 2.736 GHz + 19,270,255,649 instructions # 1.73 insn per cycle + 4.069782670 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1966) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.996543e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.814111e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.814111e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.092372 sec +TOTAL : 3.949101 sec INFO: No Floating Point Exceptions have been reported - 10,117,398,066 cycles:u # 3.262 GHz (74.99%) - 49,699,237 stalled-cycles-frontend:u # 0.49% frontend cycles idle (74.98%) - 408,670,593 stalled-cycles-backend:u # 4.04% backend cycles idle (74.98%) - 19,310,695,694 instructions:u # 1.91 insn per cycle - # 0.02 stalled cycles per insn (74.98%) - 3.103403972 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1964) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe + 10,841,394,049 cycles # 2.742 GHz + 18,690,520,828 instructions # 1.72 insn per cycle + 3.954645583 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1690) (512y: 181) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165090E-002 -Relative difference = 1.0277089176796747e-08 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.670627e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.195421e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.195421e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 4.567011 sec +INFO: No Floating Point Exceptions have been reported + 9,663,502,451 cycles # 2.114 GHz + 15,431,673,204 instructions # 1.60 insn per cycle + 4.572422830 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 901) (512y: 154) (512z: 1258) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt index 2e6e4bd7b0..19a199da15 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt @@ -1,176 +1,220 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-15_16:28:39 +DATE: 2024-05-15_09:23:15 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/gcheck.exe -p 2048 256 12 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 12 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.475890e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.078088e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.308113e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.374542 sec +EvtsPerSec[Rmb+ME] (23) = ( 5.839913e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.559208e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.040532e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 1.943278 sec INFO: No Floating Point Exceptions have been reported - 17,912,998,427 cycles:u # 3.319 GHz (74.96%) - 220,020,020 stalled-cycles-frontend:u # 1.23% frontend cycles idle (74.94%) - 6,834,525,299 stalled-cycles-backend:u # 38.15% backend cycles idle (74.97%) - 16,837,830,655 instructions:u # 0.94 insn per cycle - # 0.41 stalled cycles per insn (75.01%) - 5.425169821 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/runTest.exe + 6,181,845,855 cycles # 2.840 GHz + 11,415,068,692 instructions # 1.85 insn per cycle + 2.232241621 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165208E-002 -Relative difference = 1.0277079981222336e-08 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.240698e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.414463e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.414463e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.790648 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.053691e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.235890e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.235890e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.391359 sec INFO: No Floating Point Exceptions have been reported - 19,615,250,530 cycles:u # 3.382 GHz (74.95%) - 53,090,533 stalled-cycles-frontend:u # 0.27% frontend cycles idle (75.02%) - 60,472,852 stalled-cycles-backend:u # 0.31% backend cycles idle (75.03%) - 46,945,742,677 instructions:u # 2.39 insn per cycle - # 0.00 stalled cycles per insn (75.03%) - 5.801228335 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 472) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe + 18,284,615,829 cycles # 2.859 GHz + 43,966,934,303 instructions # 2.40 insn per cycle + 6.396729569 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.932274e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.434949e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.434949e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.976944 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.548538e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.027318e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.027318e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.492815 sec INFO: No Floating Point Exceptions have been reported - 13,228,523,302 cycles:u # 3.319 GHz (74.91%) - 52,456,504 stalled-cycles-frontend:u # 0.40% frontend cycles idle (74.92%) - 1,573,426,456 stalled-cycles-backend:u # 11.89% backend cycles idle (74.92%) - 31,201,132,149 instructions:u # 2.36 insn per cycle - # 0.05 stalled cycles per insn (74.97%) - 3.987491718 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1626) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe + 12,799,194,167 cycles # 2.850 GHz + 31,002,998,440 instructions # 2.42 insn per cycle + 4.498423848 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1644) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.649310e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.519301e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.519301e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.107454 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.927818e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.684883e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.684883e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.684126 sec +INFO: No Floating Point Exceptions have been reported + 10,059,075,308 cycles # 2.727 GHz + 19,365,321,698 instructions # 1.93 insn per cycle + 3.689518031 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1966) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.005912e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.823770e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.823770e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.554785 sec INFO: No Floating Point Exceptions have been reported - 10,115,652,915 cycles:u # 3.247 GHz (74.88%) - 50,306,144 stalled-cycles-frontend:u # 0.50% frontend cycles idle (74.90%) - 372,482,324 stalled-cycles-backend:u # 3.68% backend cycles idle (74.92%) - 19,309,824,678 instructions:u # 1.91 insn per cycle - # 0.02 stalled cycles per insn (75.03%) - 3.120959363 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1964) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe + 9,714,767,873 cycles # 2.730 GHz + 18,977,049,698 instructions # 1.95 insn per cycle + 3.560177942 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1690) (512y: 181) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165090E-002 -Relative difference = 1.0277089176796747e-08 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.671731e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.199906e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.199906e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.183865 sec +INFO: No Floating Point Exceptions have been reported + 8,579,840,171 cycles # 2.048 GHz + 15,727,472,225 instructions # 1.83 insn per cycle + 4.189226380 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 901) (512y: 154) (512z: 1258) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt index 6a5d94131d..33c7f18330 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-15_15:53:50 +DATE: 2024-05-15_08:50:40 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/gcheck.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/gcheck.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.893824e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.586353e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.907783e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 -TOTAL : 0.481272 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.810458e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.974947e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.234210e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.664919 sec INFO: No Floating Point Exceptions have been reported - 1,265,529,795 cycles:u # 2.501 GHz (74.81%) - 2,351,281 stalled-cycles-frontend:u # 0.19% frontend cycles idle (74.92%) - 5,913,872 stalled-cycles-backend:u # 0.47% backend cycles idle (74.84%) - 2,092,479,663 instructions:u # 1.65 insn per cycle - # 0.00 stalled cycles per insn (74.88%) - 0.532629891 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/runTest.exe + 2,531,875,227 cycles # 2.826 GHz + 3,924,651,160 instructions # 1.55 insn per cycle + 0.957718090 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165216E-002 -Relative difference = 1.0277079305077159e-08 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.323490e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.523841e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.523841e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.478000 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.106178e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.308715e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.308715e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.107328 sec INFO: No Floating Point Exceptions have been reported - 18,489,189,686 cycles:u # 3.368 GHz (74.94%) - 53,032,744 stalled-cycles-frontend:u # 0.29% frontend cycles idle (74.94%) - 829,160,138 stalled-cycles-backend:u # 4.48% backend cycles idle (74.94%) - 44,803,064,031 instructions:u # 2.42 insn per cycle - # 0.02 stalled cycles per insn (75.01%) - 5.492850186 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 486) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest.exe + 17,473,849,627 cycles # 2.859 GHz + 41,813,289,157 instructions # 2.39 insn per cycle + 6.112590469 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 392) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.979258e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.524530e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.524530e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.908576 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.602402e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.113473e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.113473e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.350705 sec INFO: No Floating Point Exceptions have been reported - 13,000,447,782 cycles:u # 3.317 GHz (74.92%) - 55,760,917 stalled-cycles-frontend:u # 0.43% frontend cycles idle (74.90%) - 1,631,053,608 stalled-cycles-backend:u # 12.55% backend cycles idle (74.92%) - 30,005,481,698 instructions:u # 2.31 insn per cycle - # 0.05 stalled cycles per insn (75.02%) - 3.934209277 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1569) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest.exe + 12,445,011,688 cycles # 2.858 GHz + 30,161,192,299 instructions # 2.42 insn per cycle + 4.355874746 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1612) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.605362e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.438599e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.438599e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.137948 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.933515e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.700181e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.700181e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.673811 sec +INFO: No Floating Point Exceptions have been reported + 9,974,481,462 cycles # 2.712 GHz + 19,097,432,832 instructions # 1.91 insn per cycle + 3.679029945 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1931) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165090E-002 +Relative difference = 1.0277089176796747e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.017924e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.864367e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.864367e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.534233 sec INFO: No Floating Point Exceptions have been reported - 10,337,671,349 cycles:u # 3.283 GHz (74.86%) - 53,779,157 stalled-cycles-frontend:u # 0.52% frontend cycles idle (74.89%) - 332,090,936 stalled-cycles-backend:u # 3.21% backend cycles idle (75.02%) - 18,952,579,151 instructions:u # 1.83 insn per cycle - # 0.02 stalled cycles per insn (75.10%) - 3.152056886 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1902) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest.exe + 9,647,224,526 cycles # 2.726 GHz + 18,756,987,578 instructions # 1.94 insn per cycle + 3.539409278 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1662) (512y: 178) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165090E-002 Relative difference = 1.0277089176796747e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.715481e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.272578e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.272578e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.089098 sec +INFO: No Floating Point Exceptions have been reported + 8,410,617,370 cycles # 2.055 GHz + 15,604,171,588 instructions # 1.86 insn per cycle + 4.094301669 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 887) (512y: 156) (512z: 1239) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt index 77b1a6a7a9..33fed0551a 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-15_16:11:59 +DATE: 2024-05-15_09:08:08 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/gcheck.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/gcheck.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.335138e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.114607e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.343263e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 -TOTAL : 0.492323 sec +EvtsPerSec[Rmb+ME] (23) = ( 6.628819e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.711705e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.169619e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.687102 sec INFO: No Floating Point Exceptions have been reported - 1,312,289,309 cycles:u # 2.539 GHz (74.68%) - 2,303,283 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.68%) - 5,720,150 stalled-cycles-backend:u # 0.44% backend cycles idle (75.59%) - 2,095,079,145 instructions:u # 1.60 insn per cycle - # 0.00 stalled cycles per insn (74.41%) - 0.543465242 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/runTest.exe + 2,611,517,864 cycles # 2.817 GHz + 4,097,091,457 instructions # 1.57 insn per cycle + 0.983136730 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165208E-002 -Relative difference = 1.0277079981222336e-08 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.790759e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.180659e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.180659e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.236145 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.574373e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.016662e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.016662e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.417758 sec INFO: No Floating Point Exceptions have been reported - 14,126,513,604 cycles:u # 3.327 GHz (74.95%) - 52,243,783 stalled-cycles-frontend:u # 0.37% frontend cycles idle (74.94%) - 2,149,229,322 stalled-cycles-backend:u # 15.21% backend cycles idle (74.95%) - 36,817,782,552 instructions:u # 2.61 insn per cycle - # 0.06 stalled cycles per insn (74.95%) - 4.249944382 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 707) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest.exe + 12,636,637,133 cycles # 2.858 GHz + 32,511,207,265 instructions # 2.57 insn per cycle + 4.423102265 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 296) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.407363e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.241561e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.241561e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.333377 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.003415e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.868406e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.868406e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.564130 sec INFO: No Floating Point Exceptions have been reported - 10,975,506,316 cycles:u # 3.283 GHz (74.89%) - 52,565,684 stalled-cycles-frontend:u # 0.48% frontend cycles idle (74.92%) - 978,456,899 stalled-cycles-backend:u # 8.91% backend cycles idle (75.04%) - 24,693,614,092 instructions:u # 2.25 insn per cycle - # 0.04 stalled cycles per insn (75.12%) - 3.347285273 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2334) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest.exe + 10,200,579,028 cycles # 2.858 GHz + 24,474,078,033 instructions # 2.40 insn per cycle + 3.569608882 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1251) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.027607e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.213793e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.213793e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.800855 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.159366e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.171580e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.171580e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.334345 sec +INFO: No Floating Point Exceptions have been reported + 9,137,150,683 cycles # 2.737 GHz + 16,925,063,810 instructions # 1.85 insn per cycle + 3.339844561 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1631) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.217918e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.276168e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.276168e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.257138 sec INFO: No Floating Point Exceptions have been reported - 9,080,636,256 cycles:u # 3.230 GHz (74.98%) - 49,418,393 stalled-cycles-frontend:u # 0.54% frontend cycles idle (74.96%) - 106,768,871 stalled-cycles-backend:u # 1.18% backend cycles idle (74.96%) - 16,891,831,755 instructions:u # 1.86 insn per cycle - # 0.01 stalled cycles per insn (74.97%) - 2.815004548 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1604) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest.exe + 8,913,058,262 cycles # 2.733 GHz + 16,333,083,963 instructions # 1.83 insn per cycle + 3.262669724 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1370) (512y: 139) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165090E-002 -Relative difference = 1.0277089176796747e-08 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.841334e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.527766e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.527766e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.844554 sec +INFO: No Floating Point Exceptions have been reported + 7,947,309,723 cycles # 2.070 GHz + 14,588,089,470 instructions # 1.84 insn per cycle + 3.850353922 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1015) (512y: 158) (512z: 955) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt index a7320ea899..40e4c2710d 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-15_16:12:14 +DATE: 2024-05-15_09:08:35 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/gcheck.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/gcheck.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.927470e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.583386e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.904218e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 -TOTAL : 0.493515 sec +EvtsPerSec[Rmb+ME] (23) = ( 6.672056e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.715961e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.200378e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.688059 sec INFO: No Floating Point Exceptions have been reported - 1,269,039,139 cycles:u # 2.526 GHz (74.60%) - 2,146,758 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.86%) - 5,981,286 stalled-cycles-backend:u # 0.47% backend cycles idle (74.86%) - 1,987,275,013 instructions:u # 1.57 insn per cycle - # 0.00 stalled cycles per insn (74.31%) - 0.544769433 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/runTest.exe + 2,628,811,652 cycles # 2.824 GHz + 4,098,061,770 instructions # 1.56 insn per cycle + 0.989964852 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165216E-002 -Relative difference = 1.0277079305077159e-08 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.441579e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.231307e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.231307e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.298601 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.088773e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.951304e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.951304e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.432390 sec INFO: No Floating Point Exceptions have been reported - 10,867,783,594 cycles:u # 3.284 GHz (74.87%) - 52,146,672 stalled-cycles-frontend:u # 0.48% frontend cycles idle (74.88%) - 50,308,407 stalled-cycles-backend:u # 0.46% backend cycles idle (75.00%) - 28,361,487,036 instructions:u # 2.61 insn per cycle - # 0.00 stalled cycles per insn (75.10%) - 3.312935168 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 600) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest.exe + 9,816,761,508 cycles # 2.856 GHz + 25,389,812,172 instructions # 2.59 insn per cycle + 3.437900655 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 249) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.645291e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.690258e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.690258e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.098793 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.323639e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.570997e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.570997e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.131480 sec INFO: No Floating Point Exceptions have been reported - 10,143,082,072 cycles:u # 3.263 GHz (75.04%) - 52,117,387 stalled-cycles-frontend:u # 0.51% frontend cycles idle (75.04%) - 57,943,710 stalled-cycles-backend:u # 0.57% backend cycles idle (75.04%) - 21,538,603,189 instructions:u # 2.12 insn per cycle - # 0.00 stalled cycles per insn (75.04%) - 3.112605117 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2117) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest.exe + 8,960,455,508 cycles # 2.857 GHz + 21,483,342,086 instructions # 2.40 insn per cycle + 3.137065961 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1119) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.299536e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.779460e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.779460e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.628612 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.323393e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.523515e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.523515e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.129809 sec +INFO: No Floating Point Exceptions have been reported + 8,590,805,157 cycles # 2.741 GHz + 15,810,902,744 instructions # 1.84 insn per cycle + 3.135401923 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1494) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.371590e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.617449e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.617449e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.072734 sec INFO: No Floating Point Exceptions have been reported - 8,508,384,505 cycles:u # 3.225 GHz (74.85%) - 48,598,729 stalled-cycles-frontend:u # 0.57% frontend cycles idle (74.85%) - 67,466,002 stalled-cycles-backend:u # 0.79% backend cycles idle (74.95%) - 15,817,480,134 instructions:u # 1.86 insn per cycle - # 0.00 stalled cycles per insn (75.11%) - 2.642397740 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1497) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest.exe + 8,426,985,572 cycles # 2.738 GHz + 15,515,147,883 instructions # 1.84 insn per cycle + 3.078306295 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1268) (512y: 139) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165084E-002 -Relative difference = 1.0277089582483854e-08 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.994914e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.793395e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.793395e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.576498 sec +INFO: No Floating Point Exceptions have been reported + 7,555,837,029 cycles # 2.111 GHz + 14,284,009,962 instructions # 1.89 insn per cycle + 3.582086451 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1041) (512y: 164) (512z: 874) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index 14881e21bf..8013a0a990 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-15_15:54:07 +DATE: 2024-05-15_08:51:10 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/gcheck.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=1, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.883918e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.052029e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.769187e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.372027e-02 +- 3.270769e-06 ) GeV^0 -TOTAL : 0.374784 sec +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.595984e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.320772e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.301811e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 +TOTAL : 0.570737 sec INFO: No Floating Point Exceptions have been reported - 960,879,210 cycles:u # 2.421 GHz (72.75%) - 2,175,480 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.37%) - 5,482,826 stalled-cycles-backend:u # 0.57% backend cycles idle (75.75%) - 1,732,976,834 instructions:u # 1.80 insn per cycle - # 0.00 stalled cycles per insn (75.91%) - 0.424663664 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/runTest.exe + 2,246,486,544 cycles # 2.823 GHz + 3,506,634,931 instructions # 1.56 insn per cycle + 0.852338149 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828036033170065E-002 -Relative difference = 1.2498553996774023e-06 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.423076e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.652500e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.652500e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 5.093272 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.083847e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.285456e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.285456e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 6.179178 sec INFO: No Floating Point Exceptions have been reported - 17,225,516,490 cycles:u # 3.378 GHz (74.92%) - 40,407,321 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.93%) - 34,038,801 stalled-cycles-backend:u # 0.20% backend cycles idle (75.00%) - 47,163,828,021 instructions:u # 2.74 insn per cycle - # 0.00 stalled cycles per insn (75.07%) - 5.104691057 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 542) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe + 17,746,031,356 cycles # 2.870 GHz + 43,508,011,437 instructions # 2.45 insn per cycle + 6.184344065 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039569285465E-002 -Relative difference = 3.357602059382168e-08 +Avg ME (F77/C++) = 1.2828039854866802E-002 +Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.926509e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.144859e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.144859e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.819466 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.238662e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.428787e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.428787e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.189594 sec INFO: No Floating Point Exceptions have been reported - 9,257,870,999 cycles:u # 3.275 GHz (74.83%) - 41,348,671 stalled-cycles-frontend:u # 0.45% frontend cycles idle (74.94%) - 979,441,310 stalled-cycles-backend:u # 10.58% backend cycles idle (75.08%) - 22,116,917,006 instructions:u # 2.39 insn per cycle - # 0.04 stalled cycles per insn (75.10%) - 2.833251666 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1883) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe + 9,235,498,127 cycles # 2.892 GHz + 21,907,480,657 instructions # 2.37 insn per cycle + 3.194460576 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1938) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039385567536E-002 -Relative difference = 4.7897610623017996e-08 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.427502e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.028137e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.028137e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.505394 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.409366e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.687254e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.687254e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.980854 sec INFO: No Floating Point Exceptions have been reported - 8,135,282,485 cycles:u # 3.238 GHz (74.88%) - 42,554,369 stalled-cycles-frontend:u # 0.52% frontend cycles idle (74.70%) - 1,560,275,251 stalled-cycles-backend:u # 19.18% backend cycles idle (74.83%) - 15,520,980,931 instructions:u # 1.91 insn per cycle - # 0.10 stalled cycles per insn (75.14%) - 2.516660422 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe + 8,292,293,761 cycles # 2.778 GHz + 15,591,054,832 instructions # 1.88 insn per cycle + 2.986051095 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2596) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053369958070E-002 -Relative difference = 2.627022867500074e-07 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.428922e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.741361e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.741361e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.960614 sec +INFO: No Floating Point Exceptions have been reported + 8,224,129,141 cycles # 2.775 GHz + 15,429,529,767 instructions # 1.88 insn per cycle + 2.965545528 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.377921e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.593574e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.593574e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 3.020695 sec +INFO: No Floating Point Exceptions have been reported + 6,668,305,047 cycles # 2.205 GHz + 12,864,023,469 instructions # 1.93 insn per cycle + 3.025878438 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828052585973637E-002 +Relative difference = 2.0158743040564767e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt index 9830bd7b4e..3c9816664d 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt @@ -1,182 +1,231 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-15_16:24:05 +DATE: 2024-05-15_09:18:13 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/gcheck.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 12 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.599503e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.306620e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.306620e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371886e-02 +- 3.270260e-06 ) GeV^0 -TOTAL : 5.345325 sec +EvtsPerSec[Rmb+ME] (23) = ( 6.924255e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.413592e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.413592e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 +TOTAL : 1.746384 sec INFO: No Floating Point Exceptions have been reported - 17,776,352,111 cycles:u # 3.308 GHz (74.99%) - 119,250,397 stalled-cycles-frontend:u # 0.67% frontend cycles idle (75.00%) - 6,849,788,457 stalled-cycles-backend:u # 38.53% backend cycles idle (74.99%) - 17,105,198,140 instructions:u # 0.96 insn per cycle - # 0.40 stalled cycles per insn (74.99%) - 5.402378584 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/runTest.exe + 5,610,656,254 cycles # 2.840 GHz + 10,225,693,233 instructions # 1.82 insn per cycle + 2.033039131 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828036033170065E-002 -Relative difference = 1.2498553996774023e-06 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.412731e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.638373e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.638373e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 5.185885 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.059133e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.253480e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.253480e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 6.420919 sec INFO: No Floating Point Exceptions have been reported - 17,446,020,529 cycles:u # 3.356 GHz (74.97%) - 39,837,031 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.93%) - 78,482,274 stalled-cycles-backend:u # 0.45% backend cycles idle (74.92%) - 47,339,513,994 instructions:u # 2.71 insn per cycle - # 0.00 stalled cycles per insn (74.97%) - 5.202106716 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 542) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe + 18,381,343,915 cycles # 2.860 GHz + 43,656,614,838 instructions # 2.38 insn per cycle + 6.427111291 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039569285465E-002 -Relative difference = 3.357602059382168e-08 +Avg ME (F77/C++) = 1.2828039854866802E-002 +Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.868967e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.038264e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.038264e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.950420 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.109916e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.152195e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.152195e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.495245 sec INFO: No Floating Point Exceptions have been reported - 9,628,487,073 cycles:u # 3.250 GHz (74.72%) - 42,300,894 stalled-cycles-frontend:u # 0.44% frontend cycles idle (74.77%) - 988,623,281 stalled-cycles-backend:u # 10.27% backend cycles idle (74.96%) - 23,280,735,842 instructions:u # 2.42 insn per cycle - # 0.04 stalled cycles per insn (75.10%) - 2.966852201 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1883) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe + 9,996,777,274 cycles # 2.856 GHz + 23,243,853,177 instructions # 2.33 insn per cycle + 3.501517806 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1938) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039385567536E-002 -Relative difference = 4.7897610623017996e-08 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.333903e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.830919e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.830919e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.635881 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.267470e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.372677e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.372677e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 3.282249 sec INFO: No Floating Point Exceptions have been reported - 8,486,150,079 cycles:u # 3.204 GHz (74.99%) - 42,761,185 stalled-cycles-frontend:u # 0.50% frontend cycles idle (74.94%) - 1,569,530,324 stalled-cycles-backend:u # 18.50% backend cycles idle (74.79%) - 16,678,247,062 instructions:u # 1.97 insn per cycle - # 0.09 stalled cycles per insn (74.79%) - 2.652887678 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe + 8,999,301,502 cycles # 2.737 GHz + 16,711,367,029 instructions # 1.86 insn per cycle + 3.288600293 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2596) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053369958070E-002 -Relative difference = 2.627022867500074e-07 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.300798e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.453366e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.453366e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 3.237585 sec +INFO: No Floating Point Exceptions have been reported + 8,910,454,271 cycles # 2.748 GHz + 16,548,951,381 instructions # 1.86 insn per cycle + 3.243990670 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.242765e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.310475e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.310475e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 3.319641 sec +INFO: No Floating Point Exceptions have been reported + 7,375,675,607 cycles # 2.218 GHz + 14,070,638,355 instructions # 1.91 insn per cycle + 3.325979475 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828052585973637E-002 +Relative difference = 2.0158743040564767e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt index b6c599a5f7..86b18beeca 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-15_16:31:12 +DATE: 2024-05-15_09:29:16 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/gcheck.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.831561e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.040681e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.755363e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371895e-02 +- 3.272985e-06 ) GeV^0 -TOTAL : 4.535946 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.316146e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.184478e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.273286e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371863e-02 +- 3.269951e-06 ) GeV^0 +TOTAL : 1.208576 sec INFO: No Floating Point Exceptions have been reported - 15,029,031,453 cycles:u # 3.296 GHz (74.97%) - 53,464,566 stalled-cycles-frontend:u # 0.36% frontend cycles idle (75.00%) - 6,828,705,816 stalled-cycles-backend:u # 45.44% backend cycles idle (74.98%) - 11,270,019,199 instructions:u # 0.75 insn per cycle - # 0.61 stalled cycles per insn (75.08%) - 4.585299662 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/runTest.exe + 4,060,487,484 cycles # 2.838 GHz + 6,545,680,426 instructions # 1.61 insn per cycle + 1.488008057 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828036033170065E-002 -Relative difference = 1.2498553996774023e-06 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.422756e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.651467e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.651467e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.079034e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.280414e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.280414e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 5.093908 sec +TOTAL : 6.551180 sec INFO: No Floating Point Exceptions have been reported - 17,247,361,054 cycles:u # 3.382 GHz (74.83%) - 40,283,734 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.91%) - 37,286,054 stalled-cycles-backend:u # 0.22% backend cycles idle (75.06%) - 47,177,568,793 instructions:u # 2.74 insn per cycle - # 0.00 stalled cycles per insn (75.06%) - 5.101990445 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 542) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe + 18,753,021,194 cycles # 2.861 GHz + 43,690,910,218 instructions # 2.33 insn per cycle + 6.556344298 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039569285465E-002 -Relative difference = 3.357602059382168e-08 +Avg ME (F77/C++) = 1.2828039854866802E-002 +Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.917329e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.131879e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.131879e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.211903e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.382753e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.382753e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.823566 sec +TOTAL : 3.572564 sec INFO: No Floating Point Exceptions have been reported - 9,262,737,833 cycles:u # 3.273 GHz (74.85%) - 41,466,075 stalled-cycles-frontend:u # 0.45% frontend cycles idle (74.84%) - 987,134,435 stalled-cycles-backend:u # 10.66% backend cycles idle (74.95%) - 22,124,170,980 instructions:u # 2.39 insn per cycle - # 0.04 stalled cycles per insn (75.10%) - 2.831331995 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1883) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe + 10,230,948,768 cycles # 2.860 GHz + 21,990,392,157 instructions # 2.15 insn per cycle + 3.577833075 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1938) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039385567536E-002 -Relative difference = 4.7897610623017996e-08 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.427477e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.023879e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.023879e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.375026e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.621400e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.621400e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 3.365598 sec +INFO: No Floating Point Exceptions have been reported + 9,260,613,801 cycles # 2.748 GHz + 15,502,462,040 instructions # 1.67 insn per cycle + 3.370856404 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2596) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.399053e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.676729e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.676729e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.500193 sec +TOTAL : 3.348363 sec INFO: No Floating Point Exceptions have been reported - 8,158,770,517 cycles:u # 3.255 GHz (74.80%) - 42,309,026 stalled-cycles-frontend:u # 0.52% frontend cycles idle (74.92%) - 1,542,393,291 stalled-cycles-backend:u # 18.90% backend cycles idle (75.06%) - 15,482,682,988 instructions:u # 1.90 insn per cycle - # 0.10 stalled cycles per insn (75.11%) - 2.508023648 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe + 9,213,003,046 cycles # 2.748 GHz + 15,140,470,998 instructions # 1.64 insn per cycle + 3.353630138 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053369958070E-002 -Relative difference = 2.627022867500074e-07 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.357732e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.568234e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.568234e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 3.398550 sec +INFO: No Floating Point Exceptions have been reported + 7,618,469,719 cycles # 2.239 GHz + 12,576,308,405 instructions # 1.65 insn per cycle + 3.403870283 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828052585973637E-002 +Relative difference = 2.0158743040564767e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt index fe6c7848ed..0b2b54a7e3 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt @@ -1,176 +1,220 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-15_16:29:01 +DATE: 2024-05-15_09:23:47 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/gcheck.exe -p 2048 256 12 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 12 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.340281e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.884809e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.568324e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371886e-02 +- 3.270260e-06 ) GeV^0 -TOTAL : 5.258637 sec +EvtsPerSec[Rmb+ME] (23) = ( 8.759670e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.145239e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.143418e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 +TOTAL : 1.524875 sec INFO: No Floating Point Exceptions have been reported - 17,563,321,942 cycles:u # 3.321 GHz (74.96%) - 119,585,531 stalled-cycles-frontend:u # 0.68% frontend cycles idle (75.04%) - 6,784,796,342 stalled-cycles-backend:u # 38.63% backend cycles idle (75.05%) - 16,735,211,123 instructions:u # 0.95 insn per cycle - # 0.41 stalled cycles per insn (75.05%) - 5.310494029 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/runTest.exe + 4,989,316,542 cycles # 2.839 GHz + 9,082,846,394 instructions # 1.82 insn per cycle + 1.813458953 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828036033170065E-002 -Relative difference = 1.2498553996774023e-06 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.426108e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.655329e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.655329e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 5.087357 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.080048e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.281576e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.281576e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 6.203506 sec INFO: No Floating Point Exceptions have been reported - 17,204,587,803 cycles:u # 3.377 GHz (74.91%) - 39,355,756 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.98%) - 31,548,116 stalled-cycles-backend:u # 0.18% backend cycles idle (75.03%) - 47,153,071,449 instructions:u # 2.74 insn per cycle - # 0.00 stalled cycles per insn (75.03%) - 5.096757669 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 542) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe + 17,757,530,244 cycles # 2.861 GHz + 43,508,006,160 instructions # 2.45 insn per cycle + 6.208727967 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039569285465E-002 -Relative difference = 3.357602059382168e-08 +Avg ME (F77/C++) = 1.2828039854866802E-002 +Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.923910e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.140468e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.140468e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.823848 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.206812e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.375863e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.375863e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.237434 sec INFO: No Floating Point Exceptions have been reported - 9,247,785,083 cycles:u # 3.267 GHz (74.85%) - 40,833,202 stalled-cycles-frontend:u # 0.44% frontend cycles idle (74.86%) - 986,336,744 stalled-cycles-backend:u # 10.67% backend cycles idle (74.91%) - 22,151,028,754 instructions:u # 2.40 insn per cycle - # 0.04 stalled cycles per insn (75.04%) - 2.832758864 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1883) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe + 9,259,434,414 cycles # 2.856 GHz + 21,908,038,508 instructions # 2.37 insn per cycle + 3.242611568 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1938) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039385567536E-002 -Relative difference = 4.7897610623017996e-08 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.378898e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.959224e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.959224e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.539209 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.382009e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.633872e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.633872e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 3.016863 sec +INFO: No Floating Point Exceptions have been reported + 8,280,779,950 cycles # 2.741 GHz + 15,591,245,776 instructions # 1.88 insn per cycle + 3.022171201 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2596) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.404263e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.681230e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.681230e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.994546 sec INFO: No Floating Point Exceptions have been reported - 8,186,960,394 cycles:u # 3.224 GHz (74.83%) - 42,758,994 stalled-cycles-frontend:u # 0.52% frontend cycles idle (75.12%) - 1,565,874,225 stalled-cycles-backend:u # 19.13% backend cycles idle (74.99%) - 15,488,647,093 instructions:u # 1.89 insn per cycle - # 0.10 stalled cycles per insn (75.03%) - 2.548349202 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe + 8,214,677,647 cycles # 2.739 GHz + 15,434,585,139 instructions # 1.88 insn per cycle + 2.999808256 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053369958070E-002 -Relative difference = 2.627022867500074e-07 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.362407e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.574940e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.574940e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 3.043437 sec +INFO: No Floating Point Exceptions have been reported + 6,615,257,438 cycles # 2.171 GHz + 12,865,082,178 instructions # 1.94 insn per cycle + 3.048740210 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828052585973637E-002 +Relative difference = 2.0158743040564767e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt index cba6a5b401..995450060c 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-15_15:54:22 +DATE: 2024-05-15_08:51:36 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/gcheck.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/gcheck.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=1, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.908413e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.092641e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.820730e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.372027e-02 +- 3.270769e-06 ) GeV^0 -TOTAL : 0.374505 sec +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.597215e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.332061e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.336412e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 +TOTAL : 0.570131 sec INFO: No Floating Point Exceptions have been reported - 979,760,804 cycles:u # 2.467 GHz (74.38%) - 2,343,962 stalled-cycles-frontend:u # 0.24% frontend cycles idle (74.18%) - 4,434,800 stalled-cycles-backend:u # 0.45% backend cycles idle (74.38%) - 1,767,673,609 instructions:u # 1.80 insn per cycle - # 0.00 stalled cycles per insn (75.82%) - 0.424530447 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/runTest.exe + 2,262,800,220 cycles # 2.852 GHz + 3,529,650,865 instructions # 1.56 insn per cycle + 0.850224222 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 95 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828036033170065E-002 -Relative difference = 1.2498553996774023e-06 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.547638e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.821232e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.821232e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 4.736828 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.165199e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.400279e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.400279e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 5.768753 sec INFO: No Floating Point Exceptions have been reported - 15,985,012,934 cycles:u # 3.370 GHz (74.91%) - 39,859,736 stalled-cycles-frontend:u # 0.25% frontend cycles idle (74.98%) - 27,486,688 stalled-cycles-backend:u # 0.17% backend cycles idle (75.04%) - 43,996,654,019 instructions:u # 2.75 insn per cycle - # 0.00 stalled cycles per insn (75.04%) - 4.748066178 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 467) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest.exe + 16,702,300,100 cycles # 2.893 GHz + 41,266,069,530 instructions # 2.47 insn per cycle + 5.773788428 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 375) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039569285465E-002 -Relative difference = 3.357602059382168e-08 +Avg ME (F77/C++) = 1.2828039854866802E-002 +Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.009133e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.324751e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.324751e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.759400 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.303137e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.575063e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.575063e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.111032 sec INFO: No Floating Point Exceptions have been reported - 9,044,703,094 cycles:u # 3.269 GHz (74.79%) - 41,357,970 stalled-cycles-frontend:u # 0.46% frontend cycles idle (74.86%) - 883,301,243 stalled-cycles-backend:u # 9.77% backend cycles idle (74.95%) - 21,623,711,510 instructions:u # 2.39 insn per cycle - # 0.04 stalled cycles per insn (75.08%) - 2.770793180 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1827) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest.exe + 9,008,169,469 cycles # 2.892 GHz + 21,210,986,670 instructions # 2.35 insn per cycle + 3.116090380 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1843) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039385567536E-002 -Relative difference = 4.7897610623017996e-08 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.475002e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.128091e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.128091e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.479050 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.392977e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.662301e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.662301e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 3.001977 sec INFO: No Floating Point Exceptions have been reported - 8,035,208,858 cycles:u # 3.232 GHz (74.94%) - 42,761,530 stalled-cycles-frontend:u # 0.53% frontend cycles idle (74.91%) - 1,619,377,735 stalled-cycles-backend:u # 20.15% backend cycles idle (74.90%) - 15,353,591,850 instructions:u # 1.91 insn per cycle - # 0.11 stalled cycles per insn (74.91%) - 2.490290485 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2542) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest.exe + 8,314,602,138 cycles # 2.766 GHz + 15,425,291,626 instructions # 1.86 insn per cycle + 3.007014639 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2537) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053369958070E-002 -Relative difference = 2.627022867500074e-07 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.476383e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.842526e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.842526e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.911880 sec +INFO: No Floating Point Exceptions have been reported + 8,105,425,569 cycles # 2.780 GHz + 15,232,647,489 instructions # 1.88 insn per cycle + 2.916884995 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2423) (512y: 8) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.416684e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.676680e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.676680e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.977366 sec +INFO: No Floating Point Exceptions have been reported + 6,591,431,955 cycles # 2.211 GHz + 12,844,185,156 instructions # 1.95 insn per cycle + 2.982248175 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1706) (512y: 18) (512z: 1427) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828052564145764E-002 +Relative difference = 1.9988585667912256e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt index 6bf576ec95..0b9b4485ca 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-15_16:12:28 +DATE: 2024-05-15_09:09:00 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/gcheck.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/gcheck.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=1, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.882092e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.058393e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.778143e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.372027e-02 +- 3.270769e-06 ) GeV^0 -TOTAL : 0.372693 sec +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.300754e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.188837e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.289601e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 +TOTAL : 0.584304 sec INFO: No Floating Point Exceptions have been reported - 985,461,621 cycles:u # 2.501 GHz (73.78%) - 2,165,098 stalled-cycles-frontend:u # 0.22% frontend cycles idle (73.93%) - 4,268,655 stalled-cycles-backend:u # 0.43% backend cycles idle (76.05%) - 1,822,873,609 instructions:u # 1.85 insn per cycle - # 0.00 stalled cycles per insn (76.40%) - 0.420386075 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/runTest.exe + 2,283,875,556 cycles # 2.814 GHz + 3,582,969,727 instructions # 1.57 insn per cycle + 0.868214226 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828036033170065E-002 -Relative difference = 1.2498553996774023e-06 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.936599e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.385861e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.385861e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 3.919551 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.599189e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.083386e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.083386e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 4.311622 sec INFO: No Floating Point Exceptions have been reported - 13,101,935,021 cycles:u # 3.337 GHz (74.94%) - 40,078,820 stalled-cycles-frontend:u # 0.31% frontend cycles idle (74.94%) - 1,941,296,293 stalled-cycles-backend:u # 14.82% backend cycles idle (74.95%) - 38,012,714,824 instructions:u # 2.90 insn per cycle - # 0.05 stalled cycles per insn (75.03%) - 3.930459501 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 833) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest.exe + 12,169,175,832 cycles # 2.820 GHz + 32,427,435,629 instructions # 2.66 insn per cycle + 4.316983262 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 312) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039543819614E-002 -Relative difference = 3.5561191488957804e-08 +Avg ME (F77/C++) = 1.2828039840314887E-002 +Relative difference = 1.244813035273009e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.521499e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.447162e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.447162e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.454107 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.616768e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.420514e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.420514e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 2.789822 sec INFO: No Floating Point Exceptions have been reported - 7,947,464,831 cycles:u # 3.229 GHz (74.98%) - 41,781,985 stalled-cycles-frontend:u # 0.53% frontend cycles idle (74.97%) - 830,048,875 stalled-cycles-backend:u # 10.44% backend cycles idle (74.97%) - 18,665,388,472 instructions:u # 2.35 insn per cycle - # 0.04 stalled cycles per insn (74.97%) - 2.464960549 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2808) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest.exe + 7,986,693,869 cycles # 2.858 GHz + 18,657,951,154 instructions # 2.34 insn per cycle + 2.795311494 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1555) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039385567536E-002 -Relative difference = 4.7897610623017996e-08 +Avg ME (F77/C++) = 1.2828039283704129E-002 +Relative difference = 5.583829420356249e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.864776e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.022871e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.022871e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.295687 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.713667e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.460081e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.460081e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.696640 sec INFO: No Floating Point Exceptions have been reported - 7,410,311,667 cycles:u # 3.218 GHz (74.99%) - 43,632,900 stalled-cycles-frontend:u # 0.59% frontend cycles idle (74.99%) - 940,477,748 stalled-cycles-backend:u # 12.69% backend cycles idle (74.99%) - 14,171,370,593 instructions:u # 1.91 insn per cycle - # 0.07 stalled cycles per insn (74.99%) - 2.306582436 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2251) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest.exe + 7,429,692,675 cycles # 2.750 GHz + 14,251,991,061 instructions # 1.92 insn per cycle + 2.702185959 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2237) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053337216261E-002 -Relative difference = 2.601499261602198e-07 +Avg ME (F77/C++) = 1.2828053244447801E-002 +Relative difference = 2.5291823782248813e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.769386e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.614912e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.614912e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.649318 sec +INFO: No Floating Point Exceptions have been reported + 7,300,524,783 cycles # 2.751 GHz + 13,948,352,873 instructions # 1.91 insn per cycle + 2.654703070 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2096) (512y: 3) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053244447801E-002 +Relative difference = 2.5291823782248813e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.423538e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.721341e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.721341e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.973336 sec +INFO: No Floating Point Exceptions have been reported + 6,497,241,633 cycles # 2.182 GHz + 13,423,441,698 instructions # 2.07 insn per cycle + 2.978622035 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2071) (512y: 1) (512z: 1198) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828052562326775E-002 +Relative difference = 1.997440588685788e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt index 136ba9f47b..3908c86ece 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-15_16:12:42 +DATE: 2024-05-15_09:09:24 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/gcheck.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/gcheck.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=1, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.920430e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.096494e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.821155e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.372027e-02 +- 3.270769e-06 ) GeV^0 -TOTAL : 0.370870 sec +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.310423e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.204445e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.337304e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 +TOTAL : 0.584795 sec INFO: No Floating Point Exceptions have been reported - 947,505,526 cycles:u # 2.410 GHz (75.24%) - 2,145,387 stalled-cycles-frontend:u # 0.23% frontend cycles idle (75.04%) - 5,170,736 stalled-cycles-backend:u # 0.55% backend cycles idle (75.27%) - 1,747,059,876 instructions:u # 1.84 insn per cycle - # 0.00 stalled cycles per insn (75.70%) - 0.421525177 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/runTest.exe + 2,287,699,290 cycles # 2.820 GHz + 3,599,028,569 instructions # 1.57 insn per cycle + 0.868441535 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 95 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828036033170065E-002 -Relative difference = 1.2498553996774023e-06 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.684357e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.629799e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.629799e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 3.015849 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.136531e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.094682e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.094682e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.319757 sec INFO: No Floating Point Exceptions have been reported - 9,922,200,438 cycles:u # 3.283 GHz (74.92%) - 39,686,948 stalled-cycles-frontend:u # 0.40% frontend cycles idle (74.86%) - 29,196,305 stalled-cycles-backend:u # 0.29% backend cycles idle (74.87%) - 28,597,933,127 instructions:u # 2.88 insn per cycle - # 0.00 stalled cycles per insn (74.98%) - 3.026797636 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 632) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest.exe + 9,434,042,205 cycles # 2.838 GHz + 25,263,365,170 instructions # 2.68 insn per cycle + 3.325108580 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 263) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039569285465E-002 -Relative difference = 3.357602059382168e-08 +Avg ME (F77/C++) = 1.2828039838495897E-002 +Relative difference = 1.2589928273811243e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.834973e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.307529e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.307529e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.308332 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.951052e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.482214e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.482214e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 2.517959 sec INFO: No Floating Point Exceptions have been reported - 7,466,042,626 cycles:u # 3.224 GHz (74.84%) - 40,665,488 stalled-cycles-frontend:u # 0.54% frontend cycles idle (74.80%) - 30,876,480 stalled-cycles-backend:u # 0.41% backend cycles idle (74.93%) - 16,825,238,557 instructions:u # 2.25 insn per cycle - # 0.00 stalled cycles per insn (75.09%) - 2.319001698 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2463) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest.exe + 7,205,603,839 cycles # 2.856 GHz + 16,869,214,278 instructions # 2.34 insn per cycle + 2.523300575 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1360) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039385567536E-002 -Relative difference = 4.7897610623017996e-08 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.072999e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.542142e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.542142e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.209399 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.817378e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.753095e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.753095e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.609243 sec INFO: No Floating Point Exceptions have been reported - 7,133,699,605 cycles:u # 3.218 GHz (74.80%) - 41,255,792 stalled-cycles-frontend:u # 0.58% frontend cycles idle (74.94%) - 387,219,994 stalled-cycles-backend:u # 5.43% backend cycles idle (75.10%) - 13,573,007,463 instructions:u # 1.90 insn per cycle - # 0.03 stalled cycles per insn (75.10%) - 2.220337867 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2082) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest.exe + 7,162,444,501 cycles # 2.741 GHz + 13,618,285,483 instructions # 1.90 insn per cycle + 2.614554681 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2060) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053331759293E-002 -Relative difference = 2.597245327285885e-07 +Avg ME (F77/C++) = 1.2828053220800939E-002 +Relative difference = 2.5107486628541925e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.905079e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.003899e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.003899e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.541524 sec +INFO: No Floating Point Exceptions have been reported + 7,015,769,981 cycles # 2.756 GHz + 13,432,482,610 instructions # 1.91 insn per cycle + 2.547061446 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1945) (512y: 4) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053220800939E-002 +Relative difference = 2.5107486628541925e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.524560e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.960147e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.960147e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.872532 sec +INFO: No Floating Point Exceptions have been reported + 6,324,405,863 cycles # 2.198 GHz + 13,154,939,543 instructions # 2.08 insn per cycle + 2.877932546 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2029) (512y: 1) (512z: 1083) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828052536860923E-002 +Relative difference = 1.977588895209662e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index c283caeb95..cea92017f4 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-15_15:54:37 +DATE: 2024-05-15_08:52:02 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/gcheck.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/gcheck.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.331470e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.111559e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.340159e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 -TOTAL : 0.494120 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.804910e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.941947e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.165172e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.666038 sec INFO: No Floating Point Exceptions have been reported - 1,299,598,698 cycles:u # 2.495 GHz (75.10%) - 2,154,035 stalled-cycles-frontend:u # 0.17% frontend cycles idle (75.60%) - 5,793,746 stalled-cycles-backend:u # 0.45% backend cycles idle (75.60%) - 2,077,487,131 instructions:u # 1.60 insn per cycle - # 0.00 stalled cycles per insn (74.95%) - 0.549392203 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/runTest.exe + 2,590,589,949 cycles # 2.854 GHz + 3,955,758,741 instructions # 1.53 insn per cycle + 0.976084714 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039901590281E-002 -Relative difference = 7.67145406542181e-09 +Avg ME (F77/GPU) = 1.2828039901590279E-002 +Relative difference = 7.671454200650844e-09 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.243362e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.417639e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.417639e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.783303 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.043340e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.219809e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.219809e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.446471 sec INFO: No Floating Point Exceptions have been reported - 19,561,889,676 cycles:u # 3.376 GHz (75.01%) - 52,819,414 stalled-cycles-frontend:u # 0.27% frontend cycles idle (75.01%) - 61,840,098 stalled-cycles-backend:u # 0.32% backend cycles idle (75.01%) - 47,022,800,875 instructions:u # 2.40 insn per cycle - # 0.00 stalled cycles per insn (75.01%) - 5.807726502 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 474) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest.exe + 18,657,215,844 cycles # 2.893 GHz + 44,222,139,338 instructions # 2.37 insn per cycle + 6.451661658 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 439) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.973198e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.505235e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.505235e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.913375 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.637106e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.164315e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.164315e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.261574 sec INFO: No Floating Point Exceptions have been reported - 12,947,018,589 cycles:u # 3.299 GHz (74.97%) - 48,208,744 stalled-cycles-frontend:u # 0.37% frontend cycles idle (74.93%) - 922,788,709 stalled-cycles-backend:u # 7.13% backend cycles idle (74.93%) - 31,064,474,051 instructions:u # 2.40 insn per cycle - # 0.03 stalled cycles per insn (74.94%) - 3.946281546 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1667) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest.exe + 12,336,601,747 cycles # 2.892 GHz + 30,918,015,015 instructions # 2.51 insn per cycle + 4.266662636 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1685) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.590540e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.424811e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.424811e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.152600 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.948140e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.709136e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.709136e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.645911 sec +INFO: No Floating Point Exceptions have been reported + 10,085,902,604 cycles # 2.763 GHz + 19,374,141,938 instructions # 1.92 insn per cycle + 3.651090079 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2130) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039951670679E-002 +Relative difference = 3.767475112924841e-09 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.016172e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.836535e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.836535e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.534019 sec INFO: No Floating Point Exceptions have been reported - 10,294,139,229 cycles:u # 3.254 GHz (74.99%) - 49,396,962 stalled-cycles-frontend:u # 0.48% frontend cycles idle (74.96%) - 918,821,242 stalled-cycles-backend:u # 8.93% backend cycles idle (74.96%) - 19,581,766,584 instructions:u # 1.90 insn per cycle - # 0.05 stalled cycles per insn (74.97%) - 3.167170057 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2119) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest.exe + 9,731,602,864 cycles # 2.751 GHz + 18,955,422,118 instructions # 1.95 insn per cycle + 3.539225360 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1860) (512y: 188) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.768535e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.361876e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.361876e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.973258 sec +INFO: No Floating Point Exceptions have been reported + 8,365,805,581 cycles # 2.103 GHz + 15,056,311,694 instructions # 1.80 insn per cycle + 3.978388965 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1024) (512y: 155) (512z: 1316) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039951670679E-002 +Relative difference = 3.767475112924841e-09 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt index 782ac7bf50..d024b80fdf 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-15_15:54:55 +DATE: 2024-05-15_08:52:32 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/gcheck.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/gcheck.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.919863e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.585304e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.905943e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 -TOTAL : 0.480237 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.800084e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.931545e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.154097e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.666442 sec INFO: No Floating Point Exceptions have been reported - 1,287,175,475 cycles:u # 2.547 GHz (74.78%) - 2,217,592 stalled-cycles-frontend:u # 0.17% frontend cycles idle (75.01%) - 5,481,156 stalled-cycles-backend:u # 0.43% backend cycles idle (74.96%) - 2,029,305,527 instructions:u # 1.58 insn per cycle - # 0.00 stalled cycles per insn (74.73%) - 0.532727801 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/runTest.exe + 2,566,199,127 cycles # 2.850 GHz + 3,940,059,674 instructions # 1.54 insn per cycle + 0.964208878 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039901590284E-002 -Relative difference = 7.67145379496374e-09 +Avg ME (F77/GPU) = 1.2828039901590279E-002 +Relative difference = 7.671454200650844e-09 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.311831e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.509898e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.509898e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.519562 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.080432e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.270816e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.270816e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.239499 sec INFO: No Floating Point Exceptions have been reported - 18,598,674,944 cycles:u # 3.364 GHz (74.98%) - 52,764,540 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.97%) - 51,457,005 stalled-cycles-backend:u # 0.28% backend cycles idle (74.97%) - 44,694,768,308 instructions:u # 2.40 insn per cycle - # 0.00 stalled cycles per insn (74.98%) - 5.533782344 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 498) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest.exe + 17,935,798,795 cycles # 2.873 GHz + 42,467,929,133 instructions # 2.37 insn per cycle + 6.244620252 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 421) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.025514e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.586424e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.586424e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.827235 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.648560e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.192749e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.192749e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.239099 sec INFO: No Floating Point Exceptions have been reported - 12,669,139,215 cycles:u # 3.301 GHz (74.99%) - 52,486,208 stalled-cycles-frontend:u # 0.41% frontend cycles idle (74.99%) - 835,456,353 stalled-cycles-backend:u # 6.59% backend cycles idle (74.99%) - 30,221,112,857 instructions:u # 2.39 insn per cycle - # 0.03 stalled cycles per insn (74.99%) - 3.841570194 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1650) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest.exe + 12,133,810,960 cycles # 2.860 GHz + 30,225,049,807 instructions # 2.49 insn per cycle + 4.244104452 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1692) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.596697e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.455534e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.455534e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.195230 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.934430e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.704468e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.704468e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.672897 sec +INFO: No Floating Point Exceptions have been reported + 10,041,877,066 cycles # 2.731 GHz + 19,257,423,409 instructions # 1.92 insn per cycle + 3.678067586 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2146) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039951670679E-002 +Relative difference = 3.767475112924841e-09 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.017754e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.858643e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.858643e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.538767 sec INFO: No Floating Point Exceptions have been reported - 10,281,010,170 cycles:u # 3.228 GHz (74.88%) - 52,319,367 stalled-cycles-frontend:u # 0.51% frontend cycles idle (74.92%) - 245,918,154 stalled-cycles-backend:u # 2.39% backend cycles idle (74.92%) - 18,997,713,463 instructions:u # 1.85 insn per cycle - # 0.01 stalled cycles per insn (75.05%) - 3.226221795 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2072) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest.exe + 9,651,667,831 cycles # 2.725 GHz + 18,756,471,118 instructions # 1.94 insn per cycle + 3.544018799 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1834) (512y: 191) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.771200e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.372716e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.372716e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.971695 sec +INFO: No Floating Point Exceptions have been reported + 8,223,244,431 cycles # 2.068 GHz + 14,980,053,623 instructions # 1.82 insn per cycle + 3.976869579 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1021) (512y: 156) (512z: 1305) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039951670679E-002 +Relative difference = 3.767475112924841e-09 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index bf09e0254f..6c79fc5a8a 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_15:55:13 +DATE: 2024-05-15_08:53:02 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.775469e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.965485e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.019901e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 -TOTAL : 0.369478 sec +EvtsPerSec[Rmb+ME] (23) = ( 5.190319e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.181403e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.277211e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.521181 sec INFO: No Floating Point Exceptions have been reported - 879,252,502 cycles:u # 2.222 GHz (74.05%) - 2,104,678 stalled-cycles-frontend:u # 0.24% frontend cycles idle (75.60%) - 5,252,436 stalled-cycles-backend:u # 0.60% backend cycles idle (75.78%) - 1,347,222,847 instructions:u # 1.53 insn per cycle - # 0.00 stalled cycles per insn (75.87%) - 0.424733256 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest.exe + 2,125,639,390 cycles # 2.821 GHz + 3,088,425,234 instructions # 1.45 insn per cycle + 0.810012096 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516817 -Relative difference = 3.258803416564443e-07 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.543325e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.609757e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.609757e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.302634 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.059856e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.120844e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.120844e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.198224 sec INFO: No Floating Point Exceptions have been reported - 14,784,201,122 cycles:u # 3.427 GHz (74.98%) - 8,981,005 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.97%) - 3,177,242,207 stalled-cycles-backend:u # 21.49% backend cycles idle (74.97%) - 38,735,076,382 instructions:u # 2.62 insn per cycle - # 0.08 stalled cycles per insn (74.98%) - 4.317845101 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 719) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe + 15,010,227,772 cycles # 2.885 GHz + 38,376,294,847 instructions # 2.56 insn per cycle + 5.203572793 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388515649 -Relative difference = 3.258803992249869e-07 +Avg ME (F77/C++) = 2.0288063388515645 +Relative difference = 3.258803994438787e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.408116e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.628664e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.628664e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.557856 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.494799e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.685037e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.685037e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.108579 sec INFO: No Floating Point Exceptions have been reported - 8,680,841,989 cycles:u # 3.380 GHz (74.95%) - 10,202,330 stalled-cycles-frontend:u # 0.12% frontend cycles idle (75.09%) - 767,677,046 stalled-cycles-backend:u # 8.84% backend cycles idle (75.09%) - 24,310,844,660 instructions:u # 2.80 insn per cycle - # 0.03 stalled cycles per insn (75.09%) - 2.572870820 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2067) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe + 9,075,186,083 cycles # 2.916 GHz + 24,580,004,547 instructions # 2.71 insn per cycle + 3.113697316 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 7.696598e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.291719e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.291719e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.541866 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.569480e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.052925e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.052925e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 1.990969 sec +INFO: No Floating Point Exceptions have been reported + 5,446,723,124 cycles # 2.729 GHz + 11,251,102,203 instructions # 2.07 insn per cycle + 1.996371189 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.155980e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.745118e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.745118e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 1.810758 sec INFO: No Floating Point Exceptions have been reported - 5,114,502,487 cycles:u # 3.292 GHz (74.88%) - 8,727,979 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.78%) - 17,014,329 stalled-cycles-backend:u # 0.33% backend cycles idle (74.79%) - 11,540,817,661 instructions:u # 2.26 insn per cycle - # 0.00 stalled cycles per insn (74.82%) - 1.556824864 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2396) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe + 4,934,818,844 cycles # 2.719 GHz + 10,556,993,904 instructions # 2.14 insn per cycle + 1.816016703 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2074) (512y: 144) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.705103e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.912750e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.912750e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.937812 sec +INFO: No Floating Point Exceptions have been reported + 5,375,981,348 cycles # 1.828 GHz + 7,796,359,488 instructions # 1.45 insn per cycle + 2.943091474 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt index 8154293ff2..6a936b22f8 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt @@ -1,182 +1,231 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_16:24:26 +DATE: 2024-05-15_09:18:42 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/gcheck.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 2 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.949994e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.775129e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.775129e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.227514 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.325218e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.872629e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.872629e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.833259 sec INFO: No Floating Point Exceptions have been reported - 3,766,128,203 cycles:u # 2.980 GHz (74.39%) - 38,759,428 stalled-cycles-frontend:u # 1.03% frontend cycles idle (75.02%) - 1,153,146,831 stalled-cycles-backend:u # 30.62% backend cycles idle (75.32%) - 3,874,478,439 instructions:u # 1.03 insn per cycle - # 0.30 stalled cycles per insn (75.27%) - 1.286158558 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest.exe + 3,043,304,670 cycles # 2.825 GHz + 4,749,213,667 instructions # 1.56 insn per cycle + 1.135924684 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516817 -Relative difference = 3.258803416564443e-07 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.536141e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.601805e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.601805e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.393097 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.030538e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.089970e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.089970e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.351690 sec INFO: No Floating Point Exceptions have been reported - 14,922,234,897 cycles:u # 3.381 GHz (75.00%) - 10,271,324 stalled-cycles-frontend:u # 0.07% frontend cycles idle (75.00%) - 3,181,853,965 stalled-cycles-backend:u # 21.32% backend cycles idle (74.99%) - 38,759,660,760 instructions:u # 2.60 insn per cycle - # 0.08 stalled cycles per insn (74.99%) - 4.418072942 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 719) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe + 15,328,765,090 cycles # 2.861 GHz + 38,433,892,757 instructions # 2.51 insn per cycle + 5.358519712 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388515649 -Relative difference = 3.258803992249869e-07 +Avg ME (F77/C++) = 2.0288063388515645 +Relative difference = 3.258803994438787e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.389394e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.605750e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.605750e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.656429 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.400584e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.586127e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.586127e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.272002 sec INFO: No Floating Point Exceptions have been reported - 8,847,222,549 cycles:u # 3.306 GHz (74.91%) - 9,795,840 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.91%) - 783,527,311 stalled-cycles-backend:u # 8.86% backend cycles idle (74.91%) - 24,482,823,781 instructions:u # 2.77 insn per cycle - # 0.03 stalled cycles per insn (74.93%) - 2.680334047 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2067) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe + 9,380,506,135 cycles # 2.862 GHz + 24,761,479,175 instructions # 2.64 insn per cycle + 3.278737013 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 7.603640e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.182504e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.182504e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.645689 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.360461e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.815939e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.815939e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.145107 sec INFO: No Floating Point Exceptions have been reported - 5,281,231,320 cycles:u # 3.171 GHz (75.03%) - 9,584,381 stalled-cycles-frontend:u # 0.18% frontend cycles idle (75.04%) - 40,941,844 stalled-cycles-backend:u # 0.78% backend cycles idle (75.04%) - 11,783,428,384 instructions:u # 2.23 insn per cycle - # 0.00 stalled cycles per insn (75.03%) - 1.669260734 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2396) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe + 5,786,206,777 cycles # 2.690 GHz + 11,538,131,025 instructions # 1.99 insn per cycle + 2.151904282 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.933135e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.494047e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.494047e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 1.957780 sec +INFO: No Floating Point Exceptions have been reported + 5,281,685,483 cycles # 2.689 GHz + 10,843,647,063 instructions # 2.05 insn per cycle + 1.964758338 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2074) (512y: 144) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.552106e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.743946e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.743946e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.145788 sec +INFO: No Floating Point Exceptions have been reported + 5,721,034,997 cycles # 1.815 GHz + 8,037,891,027 instructions # 1.40 insn per cycle + 3.152685384 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1542) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt index 6bd2b91592..e7b745031b 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_16:31:32 +DATE: 2024-05-15_09:29:45 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/gcheck.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.751435e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.956627e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.010731e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.590971e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.161887e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.277052e+08 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.084285 sec +TOTAL : 0.632801 sec INFO: No Floating Point Exceptions have been reported - 3,225,658,717 cycles:u # 2.977 GHz (74.93%) - 27,364,800 stalled-cycles-frontend:u # 0.85% frontend cycles idle (74.97%) - 1,146,210,651 stalled-cycles-backend:u # 35.53% backend cycles idle (74.95%) - 2,995,970,052 instructions:u # 0.93 insn per cycle - # 0.38 stalled cycles per insn (74.96%) - 1.131832669 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest.exe + 2,472,079,288 cycles # 2.821 GHz + 3,594,328,826 instructions # 1.45 insn per cycle + 0.934637105 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516817 -Relative difference = 3.258803416564443e-07 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.538537e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.604840e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.604840e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.044246e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.104768e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.104768e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.308650 sec +TOTAL : 5.300004 sec INFO: No Floating Point Exceptions have been reported - 14,804,013,808 cycles:u # 3.428 GHz (74.99%) - 9,582,136 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.99%) - 3,203,914,389 stalled-cycles-backend:u # 21.64% backend cycles idle (74.99%) - 38,731,686,667 instructions:u # 2.62 insn per cycle - # 0.08 stalled cycles per insn (75.00%) - 4.319924242 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 719) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe + 15,168,137,122 cycles # 2.860 GHz + 38,391,021,057 instructions # 2.53 insn per cycle + 5.305606427 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388515649 -Relative difference = 3.258803992249869e-07 +Avg ME (F77/C++) = 2.0288063388515645 +Relative difference = 3.258803994438787e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.415669e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.638907e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.638907e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.433972e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.620984e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.620984e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.560539 sec +TOTAL : 3.223387 sec INFO: No Floating Point Exceptions have been reported - 8,700,534,897 cycles:u # 3.382 GHz (74.89%) - 8,863,726 stalled-cycles-frontend:u # 0.10% frontend cycles idle (75.04%) - 770,913,873 stalled-cycles-backend:u # 8.86% backend cycles idle (75.12%) - 24,348,483,963 instructions:u # 2.80 insn per cycle - # 0.03 stalled cycles per insn (75.12%) - 2.574063419 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2067) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe + 9,242,671,203 cycles # 2.863 GHz + 24,577,390,932 instructions # 2.66 insn per cycle + 3.228925344 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 7.694312e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.299185e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.299185e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.369807e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.835324e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.835324e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.125214 sec +INFO: No Floating Point Exceptions have been reported + 5,666,161,085 cycles # 2.661 GHz + 11,235,267,252 instructions # 1.98 insn per cycle + 2.130780371 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.066147e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.655529e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.655529e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.546948 sec +TOTAL : 1.900125 sec INFO: No Floating Point Exceptions have been reported - 5,119,500,672 cycles:u # 3.283 GHz (74.87%) - 8,490,600 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.88%) - 15,827,668 stalled-cycles-backend:u # 0.31% backend cycles idle (74.88%) - 11,539,261,865 instructions:u # 2.25 insn per cycle - # 0.00 stalled cycles per insn (74.86%) - 1.572322821 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2396) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe + 5,120,318,153 cycles # 2.688 GHz + 10,506,045,544 instructions # 2.05 insn per cycle + 1.905757522 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2074) (512y: 144) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.596945e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.792278e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.792278e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 3.086982 sec +INFO: No Floating Point Exceptions have been reported + 5,549,243,721 cycles # 1.795 GHz + 7,742,811,855 instructions # 1.40 insn per cycle + 3.092448899 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1542) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt index dee74b693c..663cad214e 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt @@ -1,176 +1,220 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_16:29:22 +DATE: 2024-05-15_09:24:14 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/gcheck.exe -p 2048 256 2 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 2 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.803717e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.946630e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.004336e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.170710 sec +EvtsPerSec[Rmb+ME] (23) = ( 5.635282e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.156849e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.273055e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.725039 sec INFO: No Floating Point Exceptions have been reported - 3,658,150,944 cycles:u # 3.038 GHz (74.90%) - 37,981,899 stalled-cycles-frontend:u # 1.04% frontend cycles idle (74.67%) - 1,144,066,478 stalled-cycles-backend:u # 31.27% backend cycles idle (74.61%) - 3,900,221,248 instructions:u # 1.07 insn per cycle - # 0.29 stalled cycles per insn (74.78%) - 1.221748818 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest.exe + 2,706,506,365 cycles # 2.828 GHz + 4,257,792,546 instructions # 1.57 insn per cycle + 1.013725794 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516817 -Relative difference = 3.258803416564443e-07 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.542224e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.608649e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.608649e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.303981 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.044850e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.105564e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.105564e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.239087 sec INFO: No Floating Point Exceptions have been reported - 14,799,335,844 cycles:u # 3.430 GHz (74.97%) - 9,368,179 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.97%) - 3,188,931,778 stalled-cycles-backend:u # 21.55% backend cycles idle (74.97%) - 38,751,756,847 instructions:u # 2.62 insn per cycle - # 0.08 stalled cycles per insn (74.97%) - 4.315796772 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 719) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe + 15,006,948,586 cycles # 2.862 GHz + 38,376,182,709 instructions # 2.56 insn per cycle + 5.244562024 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388515649 -Relative difference = 3.258803992249869e-07 +Avg ME (F77/C++) = 2.0288063388515645 +Relative difference = 3.258803994438787e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.419387e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.638360e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.638360e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.551312 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.431762e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.619227e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.619227e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.165721 sec INFO: No Floating Point Exceptions have been reported - 8,648,533,390 cycles:u # 3.376 GHz (75.02%) - 9,892,072 stalled-cycles-frontend:u # 0.11% frontend cycles idle (75.02%) - 768,203,756 stalled-cycles-backend:u # 8.88% backend cycles idle (75.02%) - 24,374,130,896 instructions:u # 2.82 insn per cycle - # 0.03 stalled cycles per insn (75.02%) - 2.562833032 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2067) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe + 9,066,732,600 cycles # 2.860 GHz + 24,579,797,002 instructions # 2.71 insn per cycle + 3.171274433 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 7.676900e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.271425e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.271425e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.546104 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.432440e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.902788e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.902788e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.040399 sec +INFO: No Floating Point Exceptions have been reported + 5,447,679,007 cycles # 2.665 GHz + 11,252,372,098 instructions # 2.07 insn per cycle + 2.046086163 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.015340e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.590774e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.590774e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 1.852256 sec INFO: No Floating Point Exceptions have been reported - 5,132,958,800 cycles:u # 3.297 GHz (74.83%) - 9,334,961 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.84%) - 25,693,530 stalled-cycles-backend:u # 0.50% backend cycles idle (74.84%) - 11,563,000,912 instructions:u # 2.25 insn per cycle - # 0.00 stalled cycles per insn (74.88%) - 1.562529164 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2396) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe + 4,964,036,673 cycles # 2.673 GHz + 10,558,519,067 instructions # 2.13 insn per cycle + 1.857802220 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2074) (512y: 144) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.586047e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.783729e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.783729e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.033201 sec +INFO: No Floating Point Exceptions have been reported + 5,388,777,143 cycles # 1.774 GHz + 7,793,579,080 instructions # 1.45 insn per cycle + 3.038681843 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt index c6a49a35fb..4514426c62 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_15:55:26 +DATE: 2024-05-15_08:53:26 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.855896e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.923148e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.976161e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 -TOTAL : 0.369294 sec +EvtsPerSec[Rmb+ME] (23) = ( 5.195613e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.184382e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.280839e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.523488 sec INFO: No Floating Point Exceptions have been reported - 874,412,714 cycles:u # 2.211 GHz (75.09%) - 2,032,981 stalled-cycles-frontend:u # 0.23% frontend cycles idle (75.81%) - 5,400,709 stalled-cycles-backend:u # 0.62% backend cycles idle (76.16%) - 1,373,590,793 instructions:u # 1.57 insn per cycle - # 0.00 stalled cycles per insn (74.85%) - 0.421181023 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/runTest.exe + 2,155,707,026 cycles # 2.861 GHz + 3,121,946,682 instructions # 1.45 insn per cycle + 0.811708595 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516817 -Relative difference = 3.258803416564443e-07 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.454763e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.516817e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.516817e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.450589 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.040424e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.100582e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.100582e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.248272 sec INFO: No Floating Point Exceptions have been reported - 15,338,008,426 cycles:u # 3.437 GHz (74.93%) - 9,263,818 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.91%) - 91,044,693 stalled-cycles-backend:u # 0.59% backend cycles idle (74.95%) - 39,502,158,984 instructions:u # 2.58 insn per cycle - # 0.00 stalled cycles per insn (75.04%) - 4.466196489 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 596) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest.exe + 15,038,247,077 cycles # 2.864 GHz + 40,101,434,918 instructions # 2.67 insn per cycle + 5.253563437 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 669) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.420771e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.639726e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.639726e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.551384 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.594771e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.800635e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.800635e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.025777 sec INFO: No Floating Point Exceptions have been reported - 8,653,309,782 cycles:u # 3.377 GHz (75.02%) - 10,341,757 stalled-cycles-frontend:u # 0.12% frontend cycles idle (75.03%) - 1,240,769,587 stalled-cycles-backend:u # 14.34% backend cycles idle (75.03%) - 23,497,288,002 instructions:u # 2.72 insn per cycle - # 0.05 stalled cycles per insn (75.03%) - 2.566849143 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1948) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest.exe + 8,676,513,577 cycles # 2.863 GHz + 23,671,232,753 instructions # 2.73 insn per cycle + 3.030990589 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2072) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.942598e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.421256e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.421256e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.690103 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.852634e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.223067e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.223067e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.269618 sec +INFO: No Floating Point Exceptions have been reported + 6,075,123,959 cycles # 2.672 GHz + 13,062,379,699 instructions # 2.15 insn per cycle + 2.274773997 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2546) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.136902e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.549699e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.549699e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.149296 sec INFO: No Floating Point Exceptions have been reported - 5,631,807,081 cycles:u # 3.310 GHz (74.95%) - 9,115,540 stalled-cycles-frontend:u # 0.16% frontend cycles idle (75.08%) - 769,964,712 stalled-cycles-backend:u # 13.67% backend cycles idle (75.08%) - 13,135,699,077 instructions:u # 2.33 insn per cycle - # 0.06 stalled cycles per insn (75.08%) - 1.705406499 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2560) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest.exe + 5,777,124,671 cycles # 2.683 GHz + 12,323,213,633 instructions # 2.13 insn per cycle + 2.154509165 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2093) (512y: 294) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.294251e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.457758e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.457758e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.292168 sec +INFO: No Floating Point Exceptions have been reported + 5,808,964,628 cycles # 1.762 GHz + 9,602,534,488 instructions # 1.65 insn per cycle + 3.297534438 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1509) (512y: 209) (512z: 1971) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt index 81d64ef62d..39eef86b7e 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_16:12:54 +DATE: 2024-05-15_09:09:46 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/gcheck.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.860003e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.960397e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.014556e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 -TOTAL : 0.367937 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.658259e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.170216e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.280112e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.533646 sec INFO: No Floating Point Exceptions have been reported - 868,560,474 cycles:u # 2.222 GHz (75.41%) - 2,087,259 stalled-cycles-frontend:u # 0.24% frontend cycles idle (75.63%) - 4,704,932 stalled-cycles-backend:u # 0.54% backend cycles idle (75.48%) - 1,382,708,731 instructions:u # 1.59 insn per cycle - # 0.00 stalled cycles per insn (75.04%) - 0.419779048 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/runTest.exe + 2,162,169,608 cycles # 2.818 GHz + 3,108,077,231 instructions # 1.44 insn per cycle + 0.826343650 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516817 -Relative difference = 3.258803416564443e-07 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.849959e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.933619e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.933619e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.858026 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.369156e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.450097e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.450097e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 4.534956 sec INFO: No Floating Point Exceptions have been reported - 13,230,268,308 cycles:u # 3.420 GHz (75.00%) - 9,196,867 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.98%) - 1,450,173,701 stalled-cycles-backend:u # 10.96% backend cycles idle (74.98%) - 35,846,754,718 instructions:u # 2.71 insn per cycle - # 0.04 stalled cycles per insn (74.99%) - 3.872756359 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1078) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest.exe + 12,994,240,018 cycles # 2.862 GHz + 34,384,924,238 instructions # 2.65 insn per cycle + 4.540545327 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 686) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.440638e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.661874e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.661874e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.540476 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.920401e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.054397e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.054397e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.700366 sec INFO: No Floating Point Exceptions have been reported - 8,617,423,314 cycles:u # 3.377 GHz (74.92%) - 9,742,559 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.92%) - 2,419,994,807 stalled-cycles-backend:u # 28.08% backend cycles idle (74.93%) - 21,899,760,348 instructions:u # 2.54 insn per cycle - # 0.11 stalled cycles per insn (74.93%) - 2.554989772 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2334) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest.exe + 10,609,557,944 cycles # 2.864 GHz + 24,007,443,501 instructions # 2.26 insn per cycle + 3.706077406 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2582) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.697071e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.140586e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.140586e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.745335 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.424475e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.729374e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.729374e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.479913 sec INFO: No Floating Point Exceptions have been reported - 5,811,955,870 cycles:u # 3.309 GHz (74.99%) - 8,545,914 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.95%) - 1,902,564,136 stalled-cycles-backend:u # 32.74% backend cycles idle (74.95%) - 12,067,881,535 instructions:u # 2.08 insn per cycle - # 0.16 stalled cycles per insn (74.96%) - 1.759875043 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3062) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest.exe + 6,622,429,147 cycles # 2.665 GHz + 12,401,540,743 instructions # 1.87 insn per cycle + 2.485646820 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3154) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 +Avg ME (F77/C++) = 2.0288063388516200 +Relative difference = 3.2588037208240405e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.642745e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.976938e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.976938e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.368068 sec +INFO: No Floating Point Exceptions have been reported + 6,341,758,267 cycles # 2.674 GHz + 11,573,752,166 instructions # 1.83 insn per cycle + 2.373609454 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2690) (512y: 239) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516200 +Relative difference = 3.2588037208240405e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.624072e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.822346e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.822346e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.003083 sec +INFO: No Floating Point Exceptions have been reported + 5,336,365,120 cycles # 1.774 GHz + 9,296,649,977 instructions # 1.74 insn per cycle + 3.008817721 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2115) (512y: 282) (512z: 1958) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt index 30bb9dc816..f2377a3c10 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_16:13:07 +DATE: 2024-05-15_09:10:10 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/gcheck.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.855524e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.918252e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.971224e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 -TOTAL : 0.365301 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.654323e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.169281e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.279730e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.531724 sec INFO: No Floating Point Exceptions have been reported - 875,261,092 cycles:u # 2.238 GHz (74.74%) - 2,145,696 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.52%) - 4,783,588 stalled-cycles-backend:u # 0.55% backend cycles idle (75.37%) - 1,404,640,435 instructions:u # 1.60 insn per cycle - # 0.00 stalled cycles per insn (75.93%) - 0.416048016 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/runTest.exe + 2,163,666,189 cycles # 2.813 GHz + 3,097,903,315 instructions # 1.43 insn per cycle + 0.825701854 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516817 -Relative difference = 3.258803416564443e-07 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.240552e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.349765e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.349765e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.415620 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.495064e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.585961e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.585961e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 4.312328 sec INFO: No Floating Point Exceptions have been reported - 11,671,621,494 cycles:u # 3.406 GHz (75.03%) - 9,251,807 stalled-cycles-frontend:u # 0.08% frontend cycles idle (75.03%) - 68,281,414 stalled-cycles-backend:u # 0.59% backend cycles idle (75.03%) - 35,744,496,973 instructions:u # 3.06 insn per cycle - # 0.00 stalled cycles per insn (75.03%) - 3.431955897 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 469) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest.exe + 12,359,534,522 cycles # 2.863 GHz + 35,038,045,562 instructions # 2.83 insn per cycle + 4.317909347 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 457) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.823848e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.086401e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.086401e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.352281 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.901600e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.035182e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.035182e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.723663 sec INFO: No Floating Point Exceptions have been reported - 7,949,427,078 cycles:u # 3.364 GHz (74.95%) - 9,527,461 stalled-cycles-frontend:u # 0.12% frontend cycles idle (74.95%) - 1,447,684,678 stalled-cycles-backend:u # 18.21% backend cycles idle (74.96%) - 21,256,961,664 instructions:u # 2.67 insn per cycle - # 0.07 stalled cycles per insn (74.97%) - 2.367088619 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2088) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest.exe + 10,672,478,736 cycles # 2.862 GHz + 23,084,196,191 instructions # 2.16 insn per cycle + 3.729445597 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2363) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 7.971872e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.610345e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.610345e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.493454 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.789631e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.152428e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.152428e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.298581 sec +INFO: No Floating Point Exceptions have been reported + 6,165,005,368 cycles # 2.677 GHz + 11,957,773,725 instructions # 1.94 insn per cycle + 2.304298716 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2509) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.925515e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.303447e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.303447e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.238206 sec INFO: No Floating Point Exceptions have been reported - 4,944,281,841 cycles:u # 3.286 GHz (75.01%) - 9,685,586 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.01%) - 190,981,785 stalled-cycles-backend:u # 3.86% backend cycles idle (75.01%) - 11,368,416,061 instructions:u # 2.30 insn per cycle - # 0.02 stalled cycles per insn (75.03%) - 1.507919605 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2370) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest.exe + 5,999,491,603 cycles # 2.675 GHz + 11,131,162,062 instructions # 1.86 insn per cycle + 2.243685672 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2126) (512y: 174) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.712662e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.921795e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.921795e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.934330 sec +INFO: No Floating Point Exceptions have been reported + 5,221,479,602 cycles # 1.777 GHz + 9,021,223,339 instructions # 1.73 insn per cycle + 2.939902314 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1650) (512y: 208) (512z: 1567) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index d02c785b32..21615bfbd9 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_15:55:40 +DATE: 2024-05-15_08:53:50 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.691037e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.922898e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.084230e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.078079e+00 +- 3.394933e-03 ) GeV^0 -TOTAL : 0.316605 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.087934e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.711966e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.976794e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 +TOTAL : 0.479482 sec INFO: No Floating Point Exceptions have been reported - 791,598,853 cycles:u # 2.324 GHz (74.51%) - 2,217,246 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.32%) - 4,488,513 stalled-cycles-backend:u # 0.57% backend cycles idle (73.63%) - 1,374,860,279 instructions:u # 1.74 insn per cycle - # 0.00 stalled cycles per insn (73.75%) - 0.365544914 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest.exe + 1,991,295,837 cycles # 2.814 GHz + 2,882,362,006 instructions # 1.45 insn per cycle + 0.764140602 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/GPU) = 2.028815e+00 -Avg ME (F77/GPU) = 2.0288173652952537 -Relative difference = 1.1658506339321586e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.996036e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.086137e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.086137e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 3.645410 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.185399e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.256844e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.256844e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 4.884968 sec INFO: No Floating Point Exceptions have been reported - 12,564,694,817 cycles:u # 3.440 GHz (75.03%) - 7,266,320 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.03%) - 1,267,318,331 stalled-cycles-backend:u # 10.09% backend cycles idle (75.03%) - 37,032,357,445 instructions:u # 2.95 insn per cycle - # 0.03 stalled cycles per insn (75.03%) - 3.656882243 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 578) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe + 13,998,763,126 cycles # 2.864 GHz + 38,344,277,820 instructions # 2.74 insn per cycle + 4.889885817 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198367925361 -Relative difference = 8.044452636897417e-08 +Avg ME (F77/C++) = 2.0288199022179469 +Relative difference = 4.819651478256564e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.141600e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.553169e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.553169e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 -TOTAL : 1.851793 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.748933e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.130007e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.130007e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.296210 sec INFO: No Floating Point Exceptions have been reported - 6,282,909,767 cycles:u # 3.379 GHz (75.06%) - 6,700,989 stalled-cycles-frontend:u # 0.11% frontend cycles idle (75.05%) - 2,244,994,692 stalled-cycles-backend:u # 35.73% backend cycles idle (75.05%) - 15,180,893,512 instructions:u # 2.42 insn per cycle - # 0.15 stalled cycles per insn (75.05%) - 1.863223677 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2459) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe + 6,436,971,327 cycles # 2.798 GHz + 15,817,314,373 instructions # 2.46 insn per cycle + 2.301153141 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2690) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198773050681 -Relative difference = 6.047600673895608e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288193548331037 +Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.211586e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.365404e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.365404e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.010286 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.801045e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.008233e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.008233e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.278323 sec INFO: No Floating Point Exceptions have been reported - 3,328,108,462 cycles:u # 3.269 GHz (74.87%) - 9,577,106 stalled-cycles-frontend:u # 0.29% frontend cycles idle (74.86%) - 1,110,990,485 stalled-cycles-backend:u # 33.38% backend cycles idle (74.85%) - 7,706,706,733 instructions:u # 2.32 insn per cycle - # 0.14 stalled cycles per insn (74.89%) - 1.021544489 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3071) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe + 3,450,636,880 cycles # 2.691 GHz + 7,594,398,635 instructions # 2.20 insn per cycle + 1.283271260 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3049) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288186294492334 -Relative difference = 1.826435805832187e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181684445590 +Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.432847e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.092185e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.092185e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.198755 sec +INFO: No Floating Point Exceptions have been reported + 3,240,338,452 cycles # 2.694 GHz + 7,202,058,966 instructions # 2.22 insn per cycle + 1.203680701 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2849) (512y: 23) (512z: 0) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181684445590 +Relative difference = 8.302595855806234e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.702154e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.416282e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.416282e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.651897 sec +INFO: No Floating Point Exceptions have been reported + 3,048,638,509 cycles # 1.842 GHz + 5,835,754,619 instructions # 1.91 insn per cycle + 1.657060523 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2368) (512y: 24) (512z: 1888) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288183349184692 +Relative difference = 1.6508058850146622e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt index 576ba54bb8..140c2bd43a 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt @@ -1,182 +1,231 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_16:24:40 +DATE: 2024-05-15_09:19:06 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/gcheck.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 2 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.446100e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.088052e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.088052e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.079682e+00 +- 3.408341e-03 ) GeV^0 -TOTAL : 1.145963 sec +EvtsPerSec[Rmb+ME] (23) = ( 6.780357e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.391628e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.391628e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 +TOTAL : 0.689034 sec INFO: No Floating Point Exceptions have been reported - 3,556,692,327 cycles:u # 3.028 GHz (75.04%) - 21,296,336 stalled-cycles-frontend:u # 0.60% frontend cycles idle (74.87%) - 1,141,762,983 stalled-cycles-backend:u # 32.10% backend cycles idle (74.70%) - 3,938,994,900 instructions:u # 1.11 insn per cycle - # 0.29 stalled cycles per insn (74.66%) - 1.199694980 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest.exe + 2,591,002,919 cycles # 2.825 GHz + 3,986,567,081 instructions # 1.54 insn per cycle + 0.976058283 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/GPU) = 2.028815e+00 -Avg ME (F77/GPU) = 2.0288173652952537 -Relative difference = 1.1658506339321586e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.986790e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.076717e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.076717e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 3.695930 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.176806e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.247940e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.247940e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 4.947848 sec INFO: No Floating Point Exceptions have been reported - 12,667,992,373 cycles:u # 3.418 GHz (75.01%) - 8,515,560 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.97%) - 1,269,480,757 stalled-cycles-backend:u # 10.02% backend cycles idle (74.97%) - 37,129,718,188 instructions:u # 2.93 insn per cycle - # 0.03 stalled cycles per insn (74.97%) - 3.710289134 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 578) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe + 14,177,339,083 cycles # 2.862 GHz + 38,384,281,500 instructions # 2.71 insn per cycle + 4.954293026 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198367925361 -Relative difference = 8.044452636897417e-08 +Avg ME (F77/C++) = 2.0288199022179469 +Relative difference = 4.819651478256564e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.111745e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.510250e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.510250e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 -TOTAL : 1.923322 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.805267e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.201006e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.201006e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.318235 sec INFO: No Floating Point Exceptions have been reported - 6,419,461,019 cycles:u # 3.319 GHz (74.79%) - 7,200,229 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.78%) - 2,253,441,771 stalled-cycles-backend:u # 35.10% backend cycles idle (74.99%) - 15,444,957,018 instructions:u # 2.41 insn per cycle - # 0.15 stalled cycles per insn (75.18%) - 1.937561802 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2459) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe + 6,645,221,660 cycles # 2.860 GHz + 16,095,917,759 instructions # 2.42 insn per cycle + 2.324561734 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2690) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198773050681 -Relative difference = 6.047600673895608e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288193548331037 +Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.207790e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.361965e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.361965e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.057264 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.666331e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.908244e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.908244e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.344653 sec INFO: No Floating Point Exceptions have been reported - 3,433,393,334 cycles:u # 3.215 GHz (74.64%) - 7,369,251 stalled-cycles-frontend:u # 0.21% frontend cycles idle (74.57%) - 1,103,961,653 stalled-cycles-backend:u # 32.15% backend cycles idle (74.94%) - 7,843,923,093 instructions:u # 2.28 insn per cycle - # 0.14 stalled cycles per insn (75.29%) - 1.071719104 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3071) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe + 3,645,388,480 cycles # 2.700 GHz + 7,830,837,428 instructions # 2.15 insn per cycle + 1.351075698 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3049) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288186294492334 -Relative difference = 1.826435805832187e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181684445590 +Relative difference = 8.302595855806234e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.154300e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.057045e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.057045e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.280564 sec +INFO: No Floating Point Exceptions have been reported + 3,432,046,758 cycles # 2.669 GHz + 7,439,136,566 instructions # 2.17 insn per cycle + 1.286864532 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2849) (512y: 23) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181684445590 +Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.590499e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.279613e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.279613e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.727906 sec +INFO: No Floating Point Exceptions have been reported + 3,258,297,245 cycles # 1.880 GHz + 6,091,097,760 instructions # 1.87 insn per cycle + 1.734344558 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2368) (512y: 24) (512z: 1888) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288183349184692 +Relative difference = 1.6508058850146622e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt index 062bed307f..f941d4d189 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_16:31:46 +DATE: 2024-05-15_09:30:09 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/gcheck.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.572863e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.910914e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.070655e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.080169e+00 +- 3.463853e-03 ) GeV^0 -TOTAL : 1.010993 sec +EvtsPerSec[Rmb+ME] (23) = ( 9.441294e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.648540e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.972779e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.079446e+00 +- 3.403306e-03 ) GeV^0 +TOTAL : 0.577869 sec INFO: No Floating Point Exceptions have been reported - 3,134,574,677 cycles:u # 3.018 GHz (74.53%) - 10,844,566 stalled-cycles-frontend:u # 0.35% frontend cycles idle (74.60%) - 1,149,701,699 stalled-cycles-backend:u # 36.68% backend cycles idle (74.86%) - 2,833,944,222 instructions:u # 0.90 insn per cycle - # 0.41 stalled cycles per insn (75.33%) - 1.064507635 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest.exe + 2,259,987,894 cycles # 2.820 GHz + 3,318,785,216 instructions # 1.47 insn per cycle + 0.858245386 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/GPU) = 2.028815e+00 -Avg ME (F77/GPU) = 2.0288173652952537 -Relative difference = 1.1658506339321586e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.994584e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.084810e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.084810e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.185586e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.257256e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.257256e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 3.651032 sec +TOTAL : 4.942195 sec INFO: No Floating Point Exceptions have been reported - 12,626,581,938 cycles:u # 3.451 GHz (74.87%) - 8,210,222 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.89%) - 1,262,658,310 stalled-cycles-backend:u # 10.00% backend cycles idle (74.99%) - 36,980,093,862 instructions:u # 2.93 insn per cycle - # 0.03 stalled cycles per insn (75.08%) - 3.660540982 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 578) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe + 14,162,316,219 cycles # 2.863 GHz + 38,370,272,145 instructions # 2.71 insn per cycle + 4.947449358 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198367925361 -Relative difference = 8.044452636897417e-08 +Avg ME (F77/C++) = 2.0288199022179469 +Relative difference = 4.819651478256564e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.361901e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.796195e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.796195e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 -TOTAL : 1.796173 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.853985e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.254344e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.254344e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 2.306547 sec INFO: No Floating Point Exceptions have been reported - 6,122,999,769 cycles:u # 3.394 GHz (74.63%) - 6,731,474 stalled-cycles-frontend:u # 0.11% frontend cycles idle (75.05%) - 2,127,642,873 stalled-cycles-backend:u # 34.75% backend cycles idle (75.17%) - 15,142,149,977 instructions:u # 2.47 insn per cycle - # 0.14 stalled cycles per insn (75.17%) - 1.805751630 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2459) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe + 6,614,179,097 cycles # 2.862 GHz + 15,829,353,273 instructions # 2.39 insn per cycle + 2.311880085 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2690) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198773050681 -Relative difference = 6.047600673895608e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288193548331037 +Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.218260e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.373989e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.373989e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.009702 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.781038e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.006761e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.006761e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 +TOTAL : 1.338691 sec INFO: No Floating Point Exceptions have been reported - 3,305,189,551 cycles:u # 3.250 GHz (74.85%) - 6,923,299 stalled-cycles-frontend:u # 0.21% frontend cycles idle (74.84%) - 1,100,547,103 stalled-cycles-backend:u # 33.30% backend cycles idle (74.84%) - 7,685,395,377 instructions:u # 2.33 insn per cycle - # 0.14 stalled cycles per insn (74.88%) - 1.019465527 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3071) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe + 3,618,619,626 cycles # 2.694 GHz + 7,578,518,232 instructions # 2.09 insn per cycle + 1.343966759 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3049) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288186294492334 -Relative difference = 1.826435805832187e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181684445590 +Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.447372e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.094056e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.094056e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 +TOTAL : 1.255442 sec +INFO: No Floating Point Exceptions have been reported + 3,405,820,822 cycles # 2.703 GHz + 7,152,441,430 instructions # 2.10 insn per cycle + 1.260812980 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2849) (512y: 23) (512z: 0) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181684445590 +Relative difference = 8.302595855806234e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.714568e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.430604e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.430604e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 1.707472 sec +INFO: No Floating Point Exceptions have been reported + 3,213,810,412 cycles # 1.877 GHz + 5,785,708,337 instructions # 1.80 insn per cycle + 1.712770365 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2368) (512y: 24) (512z: 1888) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288183349184692 +Relative difference = 1.6508058850146622e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt index 24ddfc0d54..a158958f69 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt @@ -1,176 +1,220 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_16:29:36 +DATE: 2024-05-15_09:24:38 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/gcheck.exe -p 2048 256 2 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 2 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.140243e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.867331e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.041352e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.079682e+00 +- 3.408341e-03 ) GeV^0 -TOTAL : 1.388830 sec +EvtsPerSec[Rmb+ME] (23) = ( 8.492616e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.639082e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.961805e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 +TOTAL : 0.626891 sec INFO: No Floating Point Exceptions have been reported - 3,543,456,227 cycles:u # 3.058 GHz (75.15%) - 22,706,543 stalled-cycles-frontend:u # 0.64% frontend cycles idle (75.07%) - 1,153,445,105 stalled-cycles-backend:u # 32.55% backend cycles idle (74.96%) - 3,723,010,563 instructions:u # 1.05 insn per cycle - # 0.31 stalled cycles per insn (75.42%) - 1.449467945 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest.exe + 2,398,364,796 cycles # 2.824 GHz + 3,748,950,340 instructions # 1.56 insn per cycle + 0.905670831 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/GPU) = 2.028815e+00 -Avg ME (F77/GPU) = 2.0288173652952537 -Relative difference = 1.1658506339321586e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.990535e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.080344e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.080344e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 3.652431 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.185825e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.257444e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.257444e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 4.885821 sec INFO: No Floating Point Exceptions have been reported - 12,642,355,107 cycles:u # 3.456 GHz (74.87%) - 8,841,825 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.89%) - 1,259,246,631 stalled-cycles-backend:u # 9.96% backend cycles idle (74.99%) - 36,993,375,891 instructions:u # 2.93 insn per cycle - # 0.03 stalled cycles per insn (75.07%) - 3.660280036 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 578) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe + 13,998,111,113 cycles # 2.863 GHz + 38,343,247,328 instructions # 2.74 insn per cycle + 4.891063105 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198367925361 -Relative difference = 8.044452636897417e-08 +Avg ME (F77/C++) = 2.0288199022179469 +Relative difference = 4.819651478256564e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.359128e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.802679e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.802679e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 -TOTAL : 1.793583 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.683089e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.053456e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.053456e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.328350 sec INFO: No Floating Point Exceptions have been reported - 6,111,767,563 cycles:u # 3.396 GHz (74.81%) - 6,718,217 stalled-cycles-frontend:u # 0.11% frontend cycles idle (75.02%) - 2,130,349,767 stalled-cycles-backend:u # 34.86% backend cycles idle (75.11%) - 15,164,888,939 instructions:u # 2.48 insn per cycle - # 0.14 stalled cycles per insn (75.11%) - 1.801108158 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2459) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe + 6,446,914,878 cycles # 2.764 GHz + 15,817,217,969 instructions # 2.45 insn per cycle + 2.333673502 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2690) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198773050681 -Relative difference = 6.047600673895608e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288193548331037 +Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.221926e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.379276e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.379276e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.004314 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.620606e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.875671e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.875671e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.305275 sec INFO: No Floating Point Exceptions have been reported - 3,308,642,232 cycles:u # 3.272 GHz (74.70%) - 7,145,983 stalled-cycles-frontend:u # 0.22% frontend cycles idle (74.68%) - 1,106,626,180 stalled-cycles-backend:u # 33.45% backend cycles idle (74.71%) - 7,715,445,054 instructions:u # 2.33 insn per cycle - # 0.14 stalled cycles per insn (74.87%) - 1.012317854 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3071) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe + 3,464,948,943 cycles # 2.645 GHz + 7,594,902,602 instructions # 2.19 insn per cycle + 1.310883359 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3049) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288186294492334 -Relative difference = 1.826435805832187e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181684445590 +Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.424530e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.091692e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.091692e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.200978 sec +INFO: No Floating Point Exceptions have been reported + 3,245,600,515 cycles # 2.692 GHz + 7,202,294,627 instructions # 2.22 insn per cycle + 1.206324794 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2849) (512y: 23) (512z: 0) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181684445590 +Relative difference = 8.302595855806234e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.714096e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.429171e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.429171e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.650070 sec +INFO: No Floating Point Exceptions have been reported + 3,052,618,225 cycles # 1.845 GHz + 5,835,036,219 instructions # 1.91 insn per cycle + 1.655525339 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2368) (512y: 24) (512z: 1888) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288183349184692 +Relative difference = 1.6508058850146622e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt index 6a76e7f32a..b53b6ca217 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_15:55:51 +DATE: 2024-05-15_08:54:09 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.030559e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.087069e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.266399e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.078079e+00 +- 3.394933e-03 ) GeV^0 -TOTAL : 0.315447 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.094541e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.761342e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.038400e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 +TOTAL : 0.480529 sec INFO: No Floating Point Exceptions have been reported - 781,515,701 cycles:u # 2.305 GHz (74.21%) - 2,282,387 stalled-cycles-frontend:u # 0.29% frontend cycles idle (74.66%) - 4,790,131 stalled-cycles-backend:u # 0.61% backend cycles idle (75.47%) - 1,375,233,761 instructions:u # 1.76 insn per cycle - # 0.00 stalled cycles per insn (74.46%) - 0.365017431 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/runTest.exe + 2,029,736,118 cycles # 2.841 GHz + 2,894,990,051 instructions # 1.43 insn per cycle + 0.771583749 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/GPU) = 2.028815e+00 -Avg ME (F77/GPU) = 2.0288173652952537 -Relative difference = 1.1658506339321586e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.980055e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.069168e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.069168e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 3.663191 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.174936e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.244771e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.244771e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 4.908019 sec INFO: No Floating Point Exceptions have been reported - 12,652,429,315 cycles:u # 3.447 GHz (74.99%) - 7,340,081 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.94%) - 8,136,328 stalled-cycles-backend:u # 0.06% backend cycles idle (74.94%) - 37,447,429,884 instructions:u # 2.96 insn per cycle - # 0.00 stalled cycles per insn (74.95%) - 3.674903820 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 503) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest.exe + 14,301,494,173 cycles # 2.912 GHz + 39,833,565,583 instructions # 2.79 insn per cycle + 4.913066260 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 570) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198367925361 -Relative difference = 8.044452636897417e-08 +Avg ME (F77/C++) = 2.0288199028000236 +Relative difference = 4.790961076489297e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 7.327850e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.919361e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.919361e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 -TOTAL : 1.574158 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.740661e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.295355e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.295355e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 1.913985 sec INFO: No Floating Point Exceptions have been reported - 5,334,856,873 cycles:u # 3.373 GHz (74.76%) - 6,903,179 stalled-cycles-frontend:u # 0.13% frontend cycles idle (74.73%) - 1,392,837,612 stalled-cycles-backend:u # 26.11% backend cycles idle (74.91%) - 15,200,021,814 instructions:u # 2.85 insn per cycle - # 0.09 stalled cycles per insn (75.15%) - 1.585770741 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2330) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest.exe + 5,573,020,624 cycles # 2.905 GHz + 15,285,382,071 instructions # 2.74 insn per cycle + 1.918973539 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2474) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198773050681 -Relative difference = 6.047600673895608e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288193548331037 +Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 8.856882e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.651769e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.651769e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.328611 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.280958e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.903297e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.903297e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.755267 sec INFO: No Floating Point Exceptions have been reported - 4,447,476,075 cycles:u # 3.328 GHz (74.87%) - 8,419,190 stalled-cycles-frontend:u # 0.19% frontend cycles idle (74.86%) - 1,668,128,471 stalled-cycles-backend:u # 37.51% backend cycles idle (74.86%) - 9,858,565,170 instructions:u # 2.22 insn per cycle - # 0.17 stalled cycles per insn (74.88%) - 1.340175033 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3750) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest.exe + 4,809,719,961 cycles # 2.734 GHz + 9,735,072,586 instructions # 2.02 insn per cycle + 1.760266597 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3708) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288186428369954 -Relative difference = 1.7604478492421832e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288182108197361 +Relative difference = 1.0391259163456515e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.569354e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.248087e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.248087e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.681540 sec +INFO: No Floating Point Exceptions have been reported + 4,617,673,944 cycles # 2.739 GHz + 9,326,385,797 instructions # 2.02 insn per cycle + 1.686673769 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3496) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288182108197361 +Relative difference = 1.0391259163456515e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.608630e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.095881e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.095881e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.956075 sec +INFO: No Floating Point Exceptions have been reported + 3,655,224,280 cycles # 1.865 GHz + 7,036,072,202 instructions # 1.92 insn per cycle + 1.961051327 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2610) (512y: 12) (512z: 2220) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288183459779248 +Relative difference = 1.7053177021099307e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt index b333e48464..04d36cf6de 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_16:13:19 +DATE: 2024-05-15_09:10:34 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/gcheck.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.852829e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.914888e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.075023e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.078079e+00 +- 3.394933e-03 ) GeV^0 -TOTAL : 0.315754 sec +EvtsPerSec[Rmb+ME] (23) = ( 9.477193e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.654658e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.976661e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 +TOTAL : 0.489832 sec INFO: No Floating Point Exceptions have been reported - 791,616,602 cycles:u # 2.344 GHz (74.09%) - 2,195,721 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.16%) - 4,345,045 stalled-cycles-backend:u # 0.55% backend cycles idle (73.35%) - 1,344,792,490 instructions:u # 1.70 insn per cycle - # 0.00 stalled cycles per insn (74.12%) - 0.372298758 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/runTest.exe + 2,011,185,622 cycles # 2.813 GHz + 2,828,692,272 instructions # 1.41 insn per cycle + 0.773489784 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/GPU) = 2.028815e+00 -Avg ME (F77/GPU) = 2.0288173652952537 -Relative difference = 1.1658506339321586e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.218298e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.322486e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.322486e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 3.403136 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.401235e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.488036e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.488036e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 4.455338 sec INFO: No Floating Point Exceptions have been reported - 11,740,679,609 cycles:u # 3.442 GHz (74.91%) - 7,350,250 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.80%) - 10,967,997 stalled-cycles-backend:u # 0.09% backend cycles idle (74.88%) - 34,167,335,720 instructions:u # 2.91 insn per cycle - # 0.00 stalled cycles per insn (75.11%) - 3.414881992 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 768) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest.exe + 12,589,426,076 cycles # 2.823 GHz + 34,372,775,517 instructions # 2.73 insn per cycle + 4.460794985 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 696) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288199088536203 -Relative difference = 4.4925808981097166e-08 +Avg ME (F77/C++) = 2.0288199094356969 +Relative difference = 4.463890496342449e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 7.316927e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.900595e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.900595e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 -TOTAL : 1.577815 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.156094e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.608144e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.608144e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.122636 sec INFO: No Floating Point Exceptions have been reported - 5,328,150,452 cycles:u # 3.361 GHz (74.84%) - 6,566,781 stalled-cycles-frontend:u # 0.12% frontend cycles idle (74.77%) - 2,195,911,479 stalled-cycles-backend:u # 41.21% backend cycles idle (74.79%) - 14,603,452,114 instructions:u # 2.74 insn per cycle - # 0.15 stalled cycles per insn (74.91%) - 1.588843890 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2947) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest.exe + 6,090,571,230 cycles # 2.863 GHz + 14,860,717,951 instructions # 2.44 insn per cycle + 2.128007707 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3009) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198769558221 -Relative difference = 6.06481491495597e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288193803280592 +Relative difference = 1.8746278463897685e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 9.452010e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.036286e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.036286e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.253909 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.999467e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.786326e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.786326e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.586058 sec INFO: No Floating Point Exceptions have been reported - 4,221,700,211 cycles:u # 3.347 GHz (74.54%) - 7,836,775 stalled-cycles-frontend:u # 0.19% frontend cycles idle (74.66%) - 1,638,737,482 stalled-cycles-backend:u # 38.82% backend cycles idle (74.93%) - 9,052,929,189 instructions:u # 2.14 insn per cycle - # 0.18 stalled cycles per insn (75.25%) - 1.265197675 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4501) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest.exe + 4,262,406,214 cycles # 2.680 GHz + 9,028,780,676 instructions # 2.12 insn per cycle + 1.591291867 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4443) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288186752004549 -Relative difference = 1.6009291367898262e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181999931112 +Relative difference = 9.857617164523888e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.135753e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.962770e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.962770e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.556995 sec +INFO: No Floating Point Exceptions have been reported + 4,192,442,485 cycles # 2.685 GHz + 8,663,376,318 instructions # 2.07 insn per cycle + 1.562443081 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4243) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181999931112 +Relative difference = 9.857617164523888e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.150850e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.567788e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.567788e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 2.124034 sec +INFO: No Floating Point Exceptions have been reported + 3,834,225,268 cycles # 1.801 GHz + 7,808,561,781 instructions # 2.04 insn per cycle + 2.129432875 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4424) (512y: 0) (512z: 2555) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288183246739209 +Relative difference = 1.6003107281264138e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt index 2b639f2cd8..8244fa7021 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_16:13:30 +DATE: 2024-05-15_09:10:54 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/gcheck.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.951513e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.082031e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.262905e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.078079e+00 +- 3.394933e-03 ) GeV^0 -TOTAL : 0.315292 sec +EvtsPerSec[Rmb+ME] (23) = ( 9.482985e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.705623e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.042225e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 +TOTAL : 0.488772 sec INFO: No Floating Point Exceptions have been reported - 799,120,568 cycles:u # 2.363 GHz (73.58%) - 2,236,588 stalled-cycles-frontend:u # 0.28% frontend cycles idle (73.68%) - 4,188,322 stalled-cycles-backend:u # 0.52% backend cycles idle (73.82%) - 1,326,780,634 instructions:u # 1.66 insn per cycle - # 0.00 stalled cycles per insn (75.86%) - 0.364696263 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/runTest.exe + 2,008,491,626 cycles # 2.813 GHz + 2,851,254,095 instructions # 1.42 insn per cycle + 0.772618257 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/GPU) = 2.028815e+00 -Avg ME (F77/GPU) = 2.0288173652952537 -Relative difference = 1.1658506339321586e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.488171e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.610948e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.610948e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 3.150894 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.520451e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.617692e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.617692e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 4.248173 sec INFO: No Floating Point Exceptions have been reported - 10,852,739,826 cycles:u # 3.436 GHz (74.93%) - 7,674,451 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.92%) - 14,412,989 stalled-cycles-backend:u # 0.13% backend cycles idle (74.93%) - 35,477,674,159 instructions:u # 3.27 insn per cycle - # 0.00 stalled cycles per insn (74.94%) - 3.162235761 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 469) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest.exe + 11,756,019,723 cycles # 2.765 GHz + 35,109,223,793 instructions # 2.99 insn per cycle + 4.253499726 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 470) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288199088536203 -Relative difference = 4.4925808981097166e-08 +Avg ME (F77/C++) = 2.0288199094356969 +Relative difference = 4.463890496342449e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 7.908113e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.598784e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.598784e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 -TOTAL : 1.469351 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.271716e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.745050e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.745050e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.077721 sec INFO: No Floating Point Exceptions have been reported - 4,952,050,706 cycles:u # 3.353 GHz (74.99%) - 7,312,668 stalled-cycles-frontend:u # 0.15% frontend cycles idle (75.08%) - 1,212,184,247 stalled-cycles-backend:u # 24.48% backend cycles idle (75.08%) - 14,017,353,219 instructions:u # 2.83 insn per cycle - # 0.09 stalled cycles per insn (75.08%) - 1.480455473 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2487) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest.exe + 5,957,421,378 cycles # 2.861 GHz + 14,471,556,150 instructions # 2.43 insn per cycle + 2.083048982 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2572) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198892958462 -Relative difference = 5.4565783974899003e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288193583255634 +Relative difference = 1.7661780742548925e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 9.907717e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.091306e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.091306e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.202600 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.214184e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.054314e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.054314e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.541457 sec INFO: No Floating Point Exceptions have been reported - 4,006,809,715 cycles:u # 3.311 GHz (74.88%) - 7,948,136 stalled-cycles-frontend:u # 0.20% frontend cycles idle (74.88%) - 1,400,349,482 stalled-cycles-backend:u # 34.95% backend cycles idle (74.90%) - 8,634,852,093 instructions:u # 2.16 insn per cycle - # 0.16 stalled cycles per insn (74.90%) - 1.213561260 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3422) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest.exe + 4,148,997,635 cycles # 2.683 GHz + 8,874,807,692 instructions # 2.14 insn per cycle + 1.546990735 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3574) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288186836987734 -Relative difference = 1.559041129563128e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288182107033208 +Relative difference = 1.0385521077446488e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.269929e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.124022e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.124022e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.529902 sec +INFO: No Floating Point Exceptions have been reported + 4,119,777,235 cycles # 2.685 GHz + 8,411,495,965 instructions # 2.04 insn per cycle + 1.535269339 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3319) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288182107033208 +Relative difference = 1.0385521077446488e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.237727e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.663628e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.663628e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 2.090144 sec +INFO: No Floating Point Exceptions have been reported + 3,776,443,658 cycles # 1.803 GHz + 7,702,638,011 instructions # 2.04 insn per cycle + 2.095484503 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3440) (512y: 0) (512z: 2107) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288183204829693 +Relative difference = 1.5796536184903122e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index 4ba5b34172..79ad6a8d0a 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_15:56:02 +DATE: 2024-05-15_08:54:30 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.872613e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.017576e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.073456e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 -TOTAL : 0.367222 sec +EvtsPerSec[Rmb+ME] (23) = ( 5.192411e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.181219e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.276445e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.523408 sec INFO: No Floating Point Exceptions have been reported - 881,192,883 cycles:u # 2.237 GHz (75.41%) - 2,223,709 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.69%) - 5,425,251 stalled-cycles-backend:u # 0.62% backend cycles idle (75.82%) - 1,433,173,477 instructions:u # 1.63 insn per cycle - # 0.00 stalled cycles per insn (75.82%) - 0.420951978 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/runTest.exe + 2,152,861,783 cycles # 2.850 GHz + 3,076,817,120 instructions # 1.43 insn per cycle + 0.813114486 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063423243869 -Relative difference = 3.241686434838304e-07 +Avg ME (F77/GPU) = 2.0288063423243874 +Relative difference = 3.241686432649386e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.471506e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.533081e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.533081e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.422100 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.038755e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.097734e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.097734e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.250907 sec INFO: No Floating Point Exceptions have been reported - 15,216,966,577 cycles:u # 3.432 GHz (74.97%) - 8,827,525 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.93%) - 878,631,864 stalled-cycles-backend:u # 5.77% backend cycles idle (74.92%) - 39,297,637,783 instructions:u # 2.58 insn per cycle - # 0.02 stalled cycles per insn (74.92%) - 4.437266712 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 740) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest.exe + 15,281,663,127 cycles # 2.908 GHz + 38,577,348,657 instructions # 2.52 insn per cycle + 5.256051533 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 672) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.487635e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.709666e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.709666e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.516289 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.526101e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.725280e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.725280e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.081282 sec INFO: No Floating Point Exceptions have been reported - 8,519,765,655 cycles:u # 3.371 GHz (75.03%) - 8,772,954 stalled-cycles-frontend:u # 0.10% frontend cycles idle (75.00%) - 1,785,867,944 stalled-cycles-backend:u # 20.96% backend cycles idle (75.00%) - 24,078,135,749 instructions:u # 2.83 insn per cycle - # 0.07 stalled cycles per insn (75.00%) - 2.531160319 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2102) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest.exe + 8,974,011,511 cycles # 2.909 GHz + 24,225,398,469 instructions # 2.70 insn per cycle + 3.086510757 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2188) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 7.837288e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.458636e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.458636e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.518184 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.577179e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.066553e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.066553e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 1.989604 sec +INFO: No Floating Point Exceptions have been reported + 5,407,138,280 cycles # 2.712 GHz + 11,277,258,107 instructions # 2.09 insn per cycle + 1.994773312 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2480) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288064057068964 +Relative difference = 2.9292737240031234e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.339166e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.965304e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.965304e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 1.761508 sec INFO: No Floating Point Exceptions have been reported - 5,021,965,868 cycles:u # 3.283 GHz (74.90%) - 9,276,712 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.90%) - 1,434,668,332 stalled-cycles-backend:u # 28.57% backend cycles idle (74.92%) - 11,433,674,990 instructions:u # 2.28 insn per cycle - # 0.13 stalled cycles per insn (74.92%) - 1.533115849 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2467) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest.exe + 4,844,335,136 cycles # 2.743 GHz + 10,524,652,969 instructions # 2.17 insn per cycle + 1.766651228 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2167) (512y: 148) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063930599014 -Relative difference = 2.9916108265801754e-07 +Avg ME (F77/C++) = 2.0288064057068964 +Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.829545e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.049389e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.049389e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.845936 sec +INFO: No Floating Point Exceptions have been reported + 5,212,328,534 cycles # 1.829 GHz + 7,603,724,154 instructions # 1.46 insn per cycle + 2.851172084 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1633) (512y: 126) (512z: 1608) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288064057068964 +Relative difference = 2.9292737240031234e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt index 7f1debf5d0..0e63a3f59f 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_15:56:16 +DATE: 2024-05-15_08:54:53 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.630658e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.921972e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.978316e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 -TOTAL : 0.490325 sec +EvtsPerSec[Rmb+ME] (23) = ( 5.193864e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.183193e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.279310e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.521792 sec INFO: No Floating Point Exceptions have been reported - 925,231,414 cycles:u # 2.333 GHz (73.12%) - 2,199,272 stalled-cycles-frontend:u # 0.24% frontend cycles idle (73.79%) - 6,955,777 stalled-cycles-backend:u # 0.75% backend cycles idle (75.58%) - 1,431,076,098 instructions:u # 1.55 insn per cycle - # 0.00 stalled cycles per insn (76.45%) - 0.587960415 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/runTest.exe + 2,149,548,104 cycles # 2.853 GHz + 3,068,777,900 instructions # 1.43 insn per cycle + 0.810473258 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063423243869 -Relative difference = 3.241686434838304e-07 +Avg ME (F77/GPU) = 2.0288063423243874 +Relative difference = 3.241686432649386e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.443865e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.503935e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.503935e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.471550 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.026547e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.084545e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.084545e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.282085 sec INFO: No Floating Point Exceptions have been reported - 15,422,902,750 cycles:u # 3.440 GHz (74.93%) - 9,272,160 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.97%) - 14,558,934 stalled-cycles-backend:u # 0.09% backend cycles idle (75.02%) - 40,045,851,231 instructions:u # 2.60 insn per cycle - # 0.00 stalled cycles per insn (75.02%) - 4.516805360 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 630) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest.exe + 15,346,439,759 cycles # 2.903 GHz + 40,370,397,466 instructions # 2.63 insn per cycle + 5.287522626 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 669) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.546865e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.775676e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.775676e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.484891 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.722569e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.941157e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.941157e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.924371 sec INFO: No Floating Point Exceptions have been reported - 8,423,422,341 cycles:u # 3.374 GHz (75.02%) - 9,885,842 stalled-cycles-frontend:u # 0.12% frontend cycles idle (75.01%) - 487,638,502 stalled-cycles-backend:u # 5.79% backend cycles idle (75.01%) - 23,470,310,317 instructions:u # 2.79 insn per cycle - # 0.02 stalled cycles per insn (75.01%) - 2.500153281 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1993) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest.exe + 8,527,497,888 cycles # 2.912 GHz + 23,255,313,092 instructions # 2.73 insn per cycle + 2.929888638 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2091) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.856680e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.326975e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.326975e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.710010 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.830249e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.195018e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.195018e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.278721 sec +INFO: No Floating Point Exceptions have been reported + 6,249,543,045 cycles # 2.737 GHz + 12,962,507,292 instructions # 2.07 insn per cycle + 2.283921258 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2669) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288064057068964 +Relative difference = 2.9292737240031234e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.098042e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.497971e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.497971e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.164012 sec INFO: No Floating Point Exceptions have been reported - 5,696,119,318 cycles:u # 3.308 GHz (74.96%) - 10,526,837 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.91%) - 754,967,475 stalled-cycles-backend:u # 13.25% backend cycles idle (74.69%) - 13,094,009,159 instructions:u # 2.30 insn per cycle - # 0.06 stalled cycles per insn (74.70%) - 1.725421092 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2711) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest.exe + 5,901,625,740 cycles # 2.721 GHz + 12,240,479,816 instructions # 2.07 insn per cycle + 2.169281960 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2209) (512y: 296) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063930599014 -Relative difference = 2.9916108265801754e-07 +Avg ME (F77/C++) = 2.0288064057068964 +Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.529482e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.716465e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.716465e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.079417 sec +INFO: No Floating Point Exceptions have been reported + 5,590,192,539 cycles # 1.813 GHz + 8,743,868,009 instructions # 1.56 insn per cycle + 3.084800754 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1490) (512y: 183) (512z: 1909) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288064057068964 +Relative difference = 2.9292737240031234e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index 567bc7e65f..12104a4304 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-15_15:56:30 +DATE: 2024-05-15_08:55:17 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.884650e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.028375e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.038050e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.872208e+03 +- 2.725298e+03 ) GeV^-2 -TOTAL : 0.405630 sec +EvtsPerSec[Rmb+ME] (23) = ( 9.008388e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.051803e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.064529e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.468501 sec INFO: No Floating Point Exceptions have been reported - 1,063,751,778 cycles:u # 2.576 GHz (74.35%) - 2,189,281 stalled-cycles-frontend:u # 0.21% frontend cycles idle (74.68%) - 6,058,350 stalled-cycles-backend:u # 0.57% backend cycles idle (74.88%) - 1,515,047,204 instructions:u # 1.42 insn per cycle - # 0.00 stalled cycles per insn (75.39%) - 0.452449879 seconds time elapsed + 1,972,372,372 cycles # 2.847 GHz + 2,839,654,276 instructions # 1.44 insn per cycle + 0.749356899 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.620723e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.845137e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.850867e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.805651e+03 +- 1.746055e+03 ) GeV^-2 -TOTAL : 0.627982 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.126797e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.326776e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.338237e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.602601 sec INFO: No Floating Point Exceptions have been reported - 1,715,002,123 cycles:u # 2.678 GHz (74.90%) - 2,164,287 stalled-cycles-frontend:u # 0.13% frontend cycles idle (75.04%) - 6,185,236 stalled-cycles-backend:u # 0.36% backend cycles idle (75.23%) - 2,009,201,156 instructions:u # 1.17 insn per cycle - # 0.00 stalled cycles per insn (75.18%) - 0.679343751 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/runTest.exe + 2,374,378,503 cycles # 2.828 GHz + 3,620,992,812 instructions # 1.53 insn per cycle + 0.897814922 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213684418642 -Relative difference = 4.4692399933517674e-07 +Avg ME (F77/GPU) = 1.4131213684418649 +Relative difference = 4.469239988637851e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.946588e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.959084e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.959084e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 5.588844 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.422476e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.434911e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.434911e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.787770 sec INFO: No Floating Point Exceptions have been reported - 19,579,106,412 cycles:u # 3.501 GHz (74.97%) - 2,543,884 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.97%) - 3,124,241,892 stalled-cycles-backend:u # 15.96% backend cycles idle (74.97%) - 57,877,052,779 instructions:u # 2.96 insn per cycle - # 0.05 stalled cycles per insn (75.00%) - 5.602333036 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1134) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest.exe + 19,779,847,754 cycles # 2.913 GHz + 59,609,434,037 instructions # 3.01 insn per cycle + 6.791923869 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1466) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432431 -Relative difference = 4.4692302355460254e-07 +Avg ME (F77/C++) = 1.4131213684432433 +Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.009169e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.060205e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.060205e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 2.754225 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.626684e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.671405e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.671405e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.564782 sec INFO: No Floating Point Exceptions have been reported - 9,651,465,356 cycles:u # 3.499 GHz (74.96%) - 2,367,394 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.06%) - 2,659,536,123 stalled-cycles-backend:u # 27.56% backend cycles idle (75.06%) - 29,855,463,034 instructions:u # 3.09 insn per cycle - # 0.09 stalled cycles per insn (75.06%) - 2.791405308 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4742) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest.exe + 10,370,410,191 cycles # 2.907 GHz + 30,676,006,440 instructions # 2.96 insn per cycle + 3.569040397 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5153) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.234641e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.256326e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.256326e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.354300 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.224141e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.397286e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.397286e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.799480 sec +INFO: No Floating Point Exceptions have been reported + 4,894,819,750 cycles # 2.715 GHz + 11,018,842,048 instructions # 2.25 insn per cycle + 1.803612811 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4467) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684416484 +Relative difference = 4.469241520660492e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.032219e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.053358e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.053358e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.610689 sec INFO: No Floating Point Exceptions have been reported - 4,735,250,703 cycles:u # 3.487 GHz (74.72%) - 2,168,552 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.67%) - 1,564,370,787 stalled-cycles-backend:u # 33.04% backend cycles idle (74.88%) - 11,163,033,039 instructions:u # 2.36 insn per cycle - # 0.14 stalled cycles per insn (75.18%) - 1.361230827 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4396) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest.exe + 4,375,444,673 cycles # 2.711 GHz + 10,295,955,997 instructions # 2.35 insn per cycle + 1.614893060 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4137) (512y: 91) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.058331e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.161062e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.161062e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.347148 sec +INFO: No Floating Point Exceptions have been reported + 4,106,345,128 cycles # 1.747 GHz + 5,842,279,382 instructions # 1.42 insn per cycle + 2.351375618 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1540) (512y: 95) (512z: 3466) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684416484 +Relative difference = 4.469241520660492e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt index d11df66a82..aa42dcbcf3 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt @@ -1,204 +1,250 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-15_16:24:53 +DATE: 2024-05-15_09:19:26 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/gcheck.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.482644e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.002478e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.002478e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 0.562760 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.556239e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.826539e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.826539e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.500480 sec INFO: No Floating Point Exceptions have been reported - 1,686,186,552 cycles:u # 2.877 GHz (73.70%) - 6,513,601 stalled-cycles-frontend:u # 0.39% frontend cycles idle (74.41%) - 260,016,996 stalled-cycles-backend:u # 15.42% backend cycles idle (75.49%) - 2,042,535,938 instructions:u # 1.21 insn per cycle - # 0.13 stalled cycles per insn (75.44%) - 0.609090374 seconds time elapsed + 2,010,386,751 cycles # 2.815 GHz + 3,060,121,362 instructions # 1.52 insn per cycle + 0.771215563 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.201563e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.677497e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.677497e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.948724e+03 +- 1.840727e+03 ) GeV^-2 -TOTAL : 1.249440 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.633980e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.588534e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.588534e+06 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.837499 sec INFO: No Floating Point Exceptions have been reported - 3,843,824,840 cycles:u # 2.991 GHz (75.13%) - 16,951,489 stalled-cycles-frontend:u # 0.44% frontend cycles idle (75.11%) - 848,341,874 stalled-cycles-backend:u # 22.07% backend cycles idle (75.11%) - 3,944,599,588 instructions:u # 1.03 insn per cycle - # 0.22 stalled cycles per insn (75.18%) - 1.305562838 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/runTest.exe + 3,090,776,663 cycles # 2.832 GHz + 4,980,733,695 instructions # 1.61 insn per cycle + 1.149431601 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213684418642 -Relative difference = 4.4692399933517674e-07 +Avg ME (F77/GPU) = 1.4131213684418649 +Relative difference = 4.469239988637851e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.964088e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.976611e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.976611e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 5.559066 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.381649e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.393658e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.393658e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.910820 sec INFO: No Floating Point Exceptions have been reported - 19,461,797,230 cycles:u # 3.499 GHz (74.98%) - 2,818,968 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.98%) - 3,127,759,396 stalled-cycles-backend:u # 16.07% backend cycles idle (74.98%) - 57,912,281,008 instructions:u # 2.98 insn per cycle - # 0.05 stalled cycles per insn (74.96%) - 5.566514489 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1134) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest.exe + 19,788,417,607 cycles # 2.862 GHz + 59,611,607,061 instructions # 3.01 insn per cycle + 6.915222633 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1466) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432431 -Relative difference = 4.4692302355460254e-07 +Avg ME (F77/C++) = 1.4131213684432433 +Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 5.999739e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.050012e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.050012e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 2.762614 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.548645e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.593750e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.593750e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.634125 sec INFO: No Floating Point Exceptions have been reported - 9,669,003,072 cycles:u # 3.495 GHz (74.87%) - 2,418,973 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.84%) - 2,494,759,725 stalled-cycles-backend:u # 25.80% backend cycles idle (74.92%) - 29,924,636,825 instructions:u # 3.09 insn per cycle - # 0.08 stalled cycles per insn (75.06%) - 2.770246519 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4742) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest.exe + 10,404,915,587 cycles # 2.860 GHz + 30,722,373,622 instructions # 2.95 insn per cycle + 3.638600833 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5153) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.232362e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.253478e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.253478e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.361317 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.002692e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.176059e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.176059e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.851700 sec INFO: No Floating Point Exceptions have been reported - 4,748,695,704 cycles:u # 3.479 GHz (74.81%) - 2,140,156 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.80%) - 1,563,158,568 stalled-cycles-backend:u # 32.92% backend cycles idle (74.80%) - 11,241,987,888 instructions:u # 2.37 insn per cycle - # 0.14 stalled cycles per insn (74.97%) - 1.368590983 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4396) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest.exe + 4,942,815,311 cycles # 2.664 GHz + 11,068,193,942 instructions # 2.24 insn per cycle + 1.856145679 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4467) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.006966e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.028368e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.028368e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.658669 sec +INFO: No Floating Point Exceptions have been reported + 4,425,875,596 cycles # 2.662 GHz + 10,345,271,959 instructions # 2.34 insn per cycle + 1.663069368 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4137) (512y: 91) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684416484 +Relative difference = 4.469241520660492e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.826905e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.925862e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.925862e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.434578 sec +INFO: No Floating Point Exceptions have been reported + 4,152,358,413 cycles # 1.703 GHz + 5,882,259,715 instructions # 1.42 insn per cycle + 2.439137376 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1540) (512y: 95) (512z: 3466) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684416484 +Relative difference = 4.469241520660492e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt index 73d3d969fb..f0e031f62b 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-15_15:56:45 +DATE: 2024-05-15_08:55:42 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.879539e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.012020e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.018976e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.872208e+03 +- 2.725298e+03 ) GeV^-2 -TOTAL : 0.387250 sec +EvtsPerSec[Rmb+ME] (23) = ( 8.987492e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.046583e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.059392e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.470179 sec INFO: No Floating Point Exceptions have been reported - 1,077,578,787 cycles:u # 2.612 GHz (74.03%) - 2,237,257 stalled-cycles-frontend:u # 0.21% frontend cycles idle (74.68%) - 5,834,748 stalled-cycles-backend:u # 0.54% backend cycles idle (76.42%) - 1,512,857,332 instructions:u # 1.40 insn per cycle - # 0.00 stalled cycles per insn (75.68%) - 0.432689568 seconds time elapsed + 1,973,441,131 cycles # 2.857 GHz + 2,842,887,834 instructions # 1.44 insn per cycle + 0.747905498 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.607257e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.809635e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.814736e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.805651e+03 +- 1.746055e+03 ) GeV^-2 -TOTAL : 0.628290 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.118345e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.315309e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.326550e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.598306 sec INFO: No Floating Point Exceptions have been reported - 1,711,877,158 cycles:u # 2.685 GHz (75.48%) - 2,204,155 stalled-cycles-frontend:u # 0.13% frontend cycles idle (75.02%) - 5,336,451 stalled-cycles-backend:u # 0.31% backend cycles idle (74.92%) - 2,025,677,427 instructions:u # 1.18 insn per cycle - # 0.00 stalled cycles per insn (75.16%) - 0.680661908 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/runTest.exe + 2,389,171,542 cycles # 2.862 GHz + 3,615,113,401 instructions # 1.51 insn per cycle + 0.893887406 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213684418642 -Relative difference = 4.4692399933517674e-07 +Avg ME (F77/GPU) = 1.4131213684418649 +Relative difference = 4.469239988637851e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.953489e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.965956e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.965956e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 5.575302 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.447561e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.460023e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.460023e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.718011 sec INFO: No Floating Point Exceptions have been reported - 19,552,368,610 cycles:u # 3.505 GHz (74.89%) - 2,660,305 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.96%) - 3,658,274,650 stalled-cycles-backend:u # 18.71% backend cycles idle (75.05%) - 57,688,497,038 instructions:u # 2.95 insn per cycle - # 0.06 stalled cycles per insn (75.05%) - 5.582548277 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1087) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest.exe + 19,539,796,204 cycles # 2.908 GHz + 58,797,374,199 instructions # 3.01 insn per cycle + 6.722231436 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1313) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432431 -Relative difference = 4.4692302355460254e-07 +Avg ME (F77/C++) = 1.4131213684432433 +Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.007612e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.058203e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.058203e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 2.754703 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.697416e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.742995e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.742995e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.510858 sec INFO: No Floating Point Exceptions have been reported - 9,650,646,311 cycles:u # 3.499 GHz (74.94%) - 2,305,687 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.06%) - 2,388,323,649 stalled-cycles-backend:u # 24.75% backend cycles idle (75.06%) - 30,244,173,100 instructions:u # 3.13 insn per cycle - # 0.08 stalled cycles per insn (75.06%) - 2.761973696 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4806) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest.exe + 10,222,494,794 cycles # 2.909 GHz + 30,345,546,614 instructions # 2.97 insn per cycle + 3.515028540 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4970) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.217896e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.238537e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.238537e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.372470 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.916344e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.077035e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.077035e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.860593 sec +INFO: No Floating Point Exceptions have been reported + 5,052,529,312 cycles # 2.711 GHz + 11,483,367,724 instructions # 2.27 insn per cycle + 1.864806185 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4591) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684416484 +Relative difference = 4.469241520660492e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.703300e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.891625e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.891625e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.711676 sec INFO: No Floating Point Exceptions have been reported - 4,794,762,813 cycles:u # 3.484 GHz (75.01%) - 2,118,382 stalled-cycles-frontend:u # 0.04% frontend cycles idle (75.00%) - 1,520,014,939 stalled-cycles-backend:u # 31.70% backend cycles idle (75.00%) - 11,609,689,557 instructions:u # 2.42 insn per cycle - # 0.13 stalled cycles per insn (75.00%) - 1.379533501 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4489) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest.exe + 4,651,953,328 cycles # 2.712 GHz + 10,841,908,367 instructions # 2.33 insn per cycle + 1.715874244 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4183) (512y: 244) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.026909e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.128215e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.128215e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.356894 sec +INFO: No Floating Point Exceptions have been reported + 4,119,891,537 cycles # 1.746 GHz + 6,106,228,072 instructions # 1.48 insn per cycle + 2.361085439 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1457) (512y: 139) (512z: 3568) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684416484 +Relative difference = 4.469241520660492e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index e1a0e3c9a4..5c6c882772 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-15_15:57:01 +DATE: 2024-05-15_08:56:08 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.289518e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.830945e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.927784e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.415474e+04 +- 1.288238e+04 ) GeV^-2 -TOTAL : 0.322739 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.546352e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.304110e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.387480e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 +TOTAL : 0.452126 sec INFO: No Floating Point Exceptions have been reported - 837,567,563 cycles:u # 2.417 GHz (73.98%) - 2,164,839 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.28%) - 5,659,093 stalled-cycles-backend:u # 0.68% backend cycles idle (74.69%) - 1,355,394,082 instructions:u # 1.62 insn per cycle - # 0.00 stalled cycles per insn (75.11%) - 0.368551297 seconds time elapsed + 1,893,414,828 cycles # 2.840 GHz + 2,700,627,762 instructions # 1.43 insn per cycle + 0.725425446 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 254 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.368084e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.629248e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.635221e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.619620e+05 +- 1.611328e+05 ) GeV^-2 -TOTAL : 0.435662 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.427477e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.467802e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.537028e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.630099e+02 +- 4.770719e+02 ) GeV^-2 +TOTAL : 0.495991 sec INFO: No Floating Point Exceptions have been reported - 1,147,824,263 cycles:u # 2.495 GHz (75.41%) - 2,016,822 stalled-cycles-frontend:u # 0.18% frontend cycles idle (75.37%) - 5,689,349 stalled-cycles-backend:u # 0.50% backend cycles idle (75.60%) - 1,589,336,679 instructions:u # 1.38 insn per cycle - # 0.00 stalled cycles per insn (75.69%) - 0.483291874 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/runTest.exe + 2,097,978,612 cycles # 2.852 GHz + 3,016,663,704 instructions # 1.44 insn per cycle + 0.792464663 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/GPU) = 1.412404e+00 -Avg ME (F77/GPU) = 1.4131669530965212 -Relative difference = 0.0005401804983001964 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/GPU) = 1.412608e+00 +Avg ME (F77/GPU) = 1.4132214346515752 +Relative difference = 0.00043425681546129636 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.230883e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.246043e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.246043e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.724764e+02 +- 2.665343e+02 ) GeV^-2 -TOTAL : 5.096868 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.509920e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.522973e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.522973e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 6.550589 sec INFO: No Floating Point Exceptions have been reported - 17,874,399,162 cycles:u # 3.505 GHz (74.91%) - 2,364,613 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.94%) - 3,161,727,640 stalled-cycles-backend:u # 17.69% backend cycles idle (75.02%) - 55,161,219,476 instructions:u # 3.09 insn per cycle - # 0.06 stalled cycles per insn (75.06%) - 5.103969117 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1229) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest.exe + 19,076,732,071 cycles # 2.911 GHz + 58,959,260,000 instructions # 3.09 insn per cycle + 6.554604317 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1034) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.412998e+00 -Avg ME (F77/C++) = 1.4129978146120550 -Relative difference = 1.3120184529301602e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.412986e+00 +Avg ME (F77/C++) = 1.4129858051842916 +Relative difference = 1.3787518662898538e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.073966e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.090872e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.090872e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.724763e+02 +- 2.665342e+02 ) GeV^-2 -TOTAL : 1.550666 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.236135e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.381348e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.381348e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 2.010730 sec INFO: No Floating Point Exceptions have been reported - 5,423,702,647 cycles:u # 3.490 GHz (74.81%) - 2,003,040 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.78%) - 1,720,289,419 stalled-cycles-backend:u # 31.72% backend cycles idle (74.78%) - 16,115,970,116 instructions:u # 2.97 insn per cycle - # 0.11 stalled cycles per insn (74.98%) - 1.557468987 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5205) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest.exe + 5,857,272,878 cycles # 2.908 GHz + 16,694,397,816 instructions # 2.85 insn per cycle + 2.014749871 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5765) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.412986e+00 -Avg ME (F77/C++) = 1.4129857118325333 -Relative difference = 2.039421953066926e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.412987e+00 +Avg ME (F77/C++) = 1.4129865669244737 +Relative difference = 3.06496469061158e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.373466e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.454420e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.454420e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743733e+02 +- 2.676611e+02 ) GeV^-2 -TOTAL : 0.715408 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.764087e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.827666e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.827666e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 +TOTAL : 0.950207 sec +INFO: No Floating Point Exceptions have been reported + 2,597,080,381 cycles # 2.723 GHz + 5,980,321,548 instructions # 2.30 insn per cycle + 0.954318451 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4917) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.413316e+00 +Avg ME (F77/C++) = 1.4133161655815059 +Relative difference = 1.1715816267550621e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.955925e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.034043e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.034043e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 +TOTAL : 0.858903 sec INFO: No Floating Point Exceptions have been reported - 2,492,743,461 cycles:u # 3.468 GHz (74.42%) - 2,007,007 stalled-cycles-frontend:u # 0.08% frontend cycles idle (74.44%) - 808,406,617 stalled-cycles-backend:u # 32.43% backend cycles idle (74.94%) - 6,038,138,625 instructions:u # 2.42 insn per cycle - # 0.13 stalled cycles per insn (75.46%) - 0.722468412 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4878) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest.exe + 2,340,800,245 cycles # 2.714 GHz + 5,601,926,177 instructions # 2.39 insn per cycle + 0.863041825 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4645) (512y: 36) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133162680784324 -Relative difference = 1.896804623606238e-07 +Avg ME (F77/C++) = 1.4133161655815059 +Relative difference = 1.1715816267550621e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.446888e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.489350e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.489350e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 1.155852 sec +INFO: No Floating Point Exceptions have been reported + 2,054,158,021 cycles # 1.772 GHz + 3,333,717,631 instructions # 1.62 insn per cycle + 1.159901647 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2144) (512y: 39) (512z: 3675) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.413316e+00 +Avg ME (F77/C++) = 1.4133164033579249 +Relative difference = 2.85398258307829e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt index 30cd57abf1..43c5e65f2a 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt @@ -1,204 +1,250 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-15_16:25:09 +DATE: 2024-05-15_09:19:52 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/gcheck.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.247681e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.554030e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.554030e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.755516e+02 +- 2.671055e+02 ) GeV^-2 -TOTAL : 0.531278 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.708965e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.117187e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.117187e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.009071e+02 +- 5.002295e+01 ) GeV^-2 +TOTAL : 0.465757 sec INFO: No Floating Point Exceptions have been reported - 1,475,648,505 cycles:u # 2.845 GHz (72.90%) - 7,438,312 stalled-cycles-frontend:u # 0.50% frontend cycles idle (74.29%) - 262,612,701 stalled-cycles-backend:u # 17.80% backend cycles idle (75.30%) - 1,892,172,461 instructions:u # 1.28 insn per cycle - # 0.14 stalled cycles per insn (75.93%) - 0.576464032 seconds time elapsed + 1,910,294,983 cycles # 2.814 GHz + 2,837,512,357 instructions # 1.49 insn per cycle + 0.735479351 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 254 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.105687e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.477890e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.477890e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.855934e+03 +- 1.791981e+03 ) GeV^-2 -TOTAL : 1.064579 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.531814e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.569827e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.569827e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.737500e+02 +- 4.776370e+02 ) GeV^-2 +TOTAL : 0.650442 sec INFO: No Floating Point Exceptions have been reported - 3,242,332,593 cycles:u # 2.967 GHz (75.24%) - 17,743,806 stalled-cycles-frontend:u # 0.55% frontend cycles idle (75.40%) - 857,947,060 stalled-cycles-backend:u # 26.46% backend cycles idle (74.83%) - 3,425,579,084 instructions:u # 1.06 insn per cycle - # 0.25 stalled cycles per insn (74.75%) - 1.113753694 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/runTest.exe + 2,527,978,336 cycles # 2.826 GHz + 3,864,064,048 instructions # 1.53 insn per cycle + 0.951839385 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/GPU) = 1.412404e+00 -Avg ME (F77/GPU) = 1.4131669530965212 -Relative difference = 0.0005401804983001964 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/GPU) = 1.412608e+00 +Avg ME (F77/GPU) = 1.4132214346515752 +Relative difference = 0.00043425681546129636 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.226318e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.241649e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.241649e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.724764e+02 +- 2.665343e+02 ) GeV^-2 -TOTAL : 5.106439 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.456896e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.469931e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.469931e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 6.695714 sec INFO: No Floating Point Exceptions have been reported - 17,885,648,370 cycles:u # 3.500 GHz (74.95%) - 2,924,907 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.96%) - 3,189,031,686 stalled-cycles-backend:u # 17.83% backend cycles idle (75.02%) - 55,181,031,468 instructions:u # 3.09 insn per cycle - # 0.06 stalled cycles per insn (75.10%) - 5.384687058 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1229) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest.exe + 19,187,788,903 cycles # 2.865 GHz + 58,964,541,904 instructions # 3.07 insn per cycle + 6.700042902 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1034) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.412998e+00 -Avg ME (F77/C++) = 1.4129978146120550 -Relative difference = 1.3120184529301602e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.412986e+00 +Avg ME (F77/C++) = 1.4129858051842916 +Relative difference = 1.3787518662898538e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.072067e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.088913e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.088913e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.724763e+02 +- 2.665342e+02 ) GeV^-2 -TOTAL : 1.556450 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.096292e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.240749e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.240749e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 2.050191 sec INFO: No Floating Point Exceptions have been reported - 5,440,000,446 cycles:u # 3.487 GHz (74.89%) - 2,280,613 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.88%) - 1,707,699,385 stalled-cycles-backend:u # 31.39% backend cycles idle (74.88%) - 16,135,473,210 instructions:u # 2.97 insn per cycle - # 0.11 stalled cycles per insn (74.88%) - 1.563502548 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5205) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest.exe + 5,878,600,751 cycles # 2.862 GHz + 16,741,770,713 instructions # 2.85 insn per cycle + 2.054466215 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5765) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.412986e+00 -Avg ME (F77/C++) = 1.4129857118325333 -Relative difference = 2.039421953066926e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.412987e+00 +Avg ME (F77/C++) = 1.4129865669244737 +Relative difference = 3.06496469061158e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.369755e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.450486e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.450486e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743733e+02 +- 2.676611e+02 ) GeV^-2 -TOTAL : 0.719054 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.635221e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.692353e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.692353e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 +TOTAL : 1.028531 sec INFO: No Floating Point Exceptions have been reported - 2,505,149,993 cycles:u # 3.467 GHz (74.65%) - 1,790,711 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.54%) - 809,007,986 stalled-cycles-backend:u # 32.29% backend cycles idle (74.54%) - 6,116,780,442 instructions:u # 2.44 insn per cycle - # 0.13 stalled cycles per insn (74.86%) - 0.726331866 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4878) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest.exe + 2,753,199,998 cycles # 2.668 GHz + 6,017,134,022 instructions # 2.19 insn per cycle + 1.032815039 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4917) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133162680784324 -Relative difference = 1.896804623606238e-07 +Avg ME (F77/C++) = 1.4133161655815059 +Relative difference = 1.1715816267550621e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.913612e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.992304e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.992304e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 +TOTAL : 0.882693 sec +INFO: No Floating Point Exceptions have been reported + 2,367,924,107 cycles # 2.672 GHz + 5,639,124,107 instructions # 2.38 insn per cycle + 0.886915877 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4645) (512y: 36) (512z: 0) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.413316e+00 +Avg ME (F77/C++) = 1.4133161655815059 +Relative difference = 1.1715816267550621e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.395393e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.436951e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.436951e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 1.202996 sec +INFO: No Floating Point Exceptions have been reported + 2,083,621,174 cycles # 1.727 GHz + 3,374,799,861 instructions # 1.62 insn per cycle + 1.207355873 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2144) (512y: 39) (512z: 3675) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.413316e+00 +Avg ME (F77/C++) = 1.4133164033579249 +Relative difference = 2.85398258307829e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt index 1bd08114fb..f08ecd1d39 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-15_15:57:14 +DATE: 2024-05-15_08:56:28 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.210766e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.663281e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.764884e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.415474e+04 +- 1.288238e+04 ) GeV^-2 -TOTAL : 0.322105 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.567059e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.326056e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.411539e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 +TOTAL : 0.451783 sec INFO: No Floating Point Exceptions have been reported - 840,750,027 cycles:u # 2.432 GHz (73.98%) - 2,158,503 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.63%) - 6,103,258 stalled-cycles-backend:u # 0.73% backend cycles idle (75.04%) - 1,324,693,507 instructions:u # 1.58 insn per cycle - # 0.00 stalled cycles per insn (75.71%) - 0.368242227 seconds time elapsed + 1,890,106,900 cycles # 2.839 GHz + 2,663,406,859 instructions # 1.41 insn per cycle + 0.724170807 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 248 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.424369e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.692214e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.697518e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.619620e+05 +- 1.611328e+05 ) GeV^-2 -TOTAL : 0.433909 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.396627e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.418487e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.484929e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.630099e+02 +- 4.770719e+02 ) GeV^-2 +TOTAL : 0.497663 sec INFO: No Floating Point Exceptions have been reported - 1,150,364,539 cycles:u # 2.502 GHz (75.49%) - 2,023,314 stalled-cycles-frontend:u # 0.18% frontend cycles idle (75.62%) - 4,483,319 stalled-cycles-backend:u # 0.39% backend cycles idle (76.09%) - 1,521,066,999 instructions:u # 1.32 insn per cycle - # 0.00 stalled cycles per insn (75.31%) - 0.483317914 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/runTest.exe + 2,058,055,387 cycles # 2.818 GHz + 3,003,578,703 instructions # 1.46 insn per cycle + 0.786932640 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/GPU) = 1.412404e+00 -Avg ME (F77/GPU) = 1.4131669531526541 -Relative difference = 0.0005401805380429868 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/GPU) = 1.412608e+00 +Avg ME (F77/GPU) = 1.4132214346515752 +Relative difference = 0.00043425681546129636 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.256543e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.271956e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.271956e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.724764e+02 +- 2.665343e+02 ) GeV^-2 -TOTAL : 5.056864 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.521953e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.535392e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.535392e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 6.519150 sec INFO: No Floating Point Exceptions have been reported - 17,712,188,734 cycles:u # 3.500 GHz (75.02%) - 2,582,868 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.02%) - 2,714,112,309 stalled-cycles-backend:u # 15.32% backend cycles idle (75.02%) - 54,909,074,386 instructions:u # 3.10 insn per cycle - # 0.05 stalled cycles per insn (75.02%) - 5.063913868 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1171) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest.exe + 18,988,818,598 cycles # 2.912 GHz + 58,701,795,910 instructions # 3.09 insn per cycle + 6.523314910 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1029) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.412998e+00 -Avg ME (F77/C++) = 1.4129978146120550 -Relative difference = 1.3120184529301602e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.412986e+00 +Avg ME (F77/C++) = 1.4129858051842916 +Relative difference = 1.3787518662898538e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.127879e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.146554e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.146554e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.724763e+02 +- 2.665342e+02 ) GeV^-2 -TOTAL : 1.477722 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.643168e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.800713e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.800713e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 1.916804 sec INFO: No Floating Point Exceptions have been reported - 5,183,480,838 cycles:u # 3.500 GHz (74.69%) - 1,952,096 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.95%) - 1,427,331,531 stalled-cycles-backend:u # 27.54% backend cycles idle (75.16%) - 16,162,972,824 instructions:u # 3.12 insn per cycle - # 0.09 stalled cycles per insn (75.16%) - 1.484737821 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5136) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest.exe + 5,589,492,084 cycles # 2.911 GHz + 16,510,117,368 instructions # 2.95 insn per cycle + 1.921033755 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5551) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.412986e+00 -Avg ME (F77/C++) = 1.4129857712652836 -Relative difference = 1.618803841657786e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.412987e+00 +Avg ME (F77/C++) = 1.4129865669244737 +Relative difference = 3.06496469061158e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.125930e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.190715e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.190715e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743733e+02 +- 2.676611e+02 ) GeV^-2 -TOTAL : 0.795572 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.535382e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.582428e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.582428e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 +TOTAL : 1.088293 sec +INFO: No Floating Point Exceptions have been reported + 2,973,830,855 cycles # 2.724 GHz + 6,634,211,735 instructions # 2.23 insn per cycle + 1.092429717 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5568) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.413316e+00 +Avg ME (F77/C++) = 1.4133161655815059 +Relative difference = 1.1715816267550621e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.654844e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.711079e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.711079e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 +TOTAL : 1.011276 sec INFO: No Floating Point Exceptions have been reported - 2,763,774,994 cycles:u # 3.459 GHz (74.97%) - 2,203,313 stalled-cycles-frontend:u # 0.08% frontend cycles idle (74.97%) - 785,450,570 stalled-cycles-backend:u # 28.42% backend cycles idle (74.97%) - 6,662,264,762 instructions:u # 2.41 insn per cycle - # 0.12 stalled cycles per insn (74.97%) - 0.802444349 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5430) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest.exe + 2,757,667,641 cycles # 2.717 GHz + 6,254,951,975 instructions # 2.27 insn per cycle + 1.015690621 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5279) (512y: 25) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133162680784324 -Relative difference = 1.896804623606238e-07 +Avg ME (F77/C++) = 1.4133161655815059 +Relative difference = 1.1715816267550621e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.327836e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.363787e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.363787e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 1.257441 sec +INFO: No Floating Point Exceptions have been reported + 2,228,619,894 cycles # 1.768 GHz + 3,698,392,498 instructions # 1.66 insn per cycle + 1.261580008 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2391) (512y: 29) (512z: 3970) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.413316e+00 +Avg ME (F77/C++) = 1.4133164033579249 +Relative difference = 2.85398258307829e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index 4627425d79..8a353a8e39 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-15_15:57:26 +DATE: 2024-05-15_08:56:49 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.887775e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.027262e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.036364e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.872208e+03 +- 2.725298e+03 ) GeV^-2 -TOTAL : 0.387780 sec +EvtsPerSec[Rmb+ME] (23) = ( 8.931968e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.043555e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.056151e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.467354 sec INFO: No Floating Point Exceptions have been reported - 1,049,027,669 cycles:u # 2.546 GHz (73.95%) - 2,134,474 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.14%) - 5,844,380 stalled-cycles-backend:u # 0.56% backend cycles idle (74.94%) - 1,528,986,593 instructions:u # 1.46 insn per cycle - # 0.00 stalled cycles per insn (74.83%) - 0.432775843 seconds time elapsed + 1,981,280,626 cycles # 2.860 GHz + 2,847,085,390 instructions # 1.44 insn per cycle + 0.749733800 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.635615e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.843712e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.849442e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.805651e+03 +- 1.746055e+03 ) GeV^-2 -TOTAL : 0.609776 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.119168e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.316295e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.327683e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.603396 sec INFO: No Floating Point Exceptions have been reported - 1,721,119,508 cycles:u # 2.687 GHz (74.86%) - 2,110,336 stalled-cycles-frontend:u # 0.12% frontend cycles idle (75.04%) - 6,031,696 stalled-cycles-backend:u # 0.35% backend cycles idle (75.30%) - 2,015,973,548 instructions:u # 1.17 insn per cycle - # 0.00 stalled cycles per insn (75.20%) - 0.663204305 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/runTest.exe + 2,399,824,573 cycles # 2.857 GHz + 3,685,028,033 instructions # 1.54 insn per cycle + 0.898525901 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213755569483 -Relative difference = 4.4188898885662695e-07 +Avg ME (F77/GPU) = 1.4131213755569487 +Relative difference = 4.418889885423659e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.871835e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.883506e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.883506e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 5.733120 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.390547e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.402443e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.402443e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.877879 sec INFO: No Floating Point Exceptions have been reported - 20,114,351,158 cycles:u # 3.506 GHz (74.93%) - 2,517,021 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.99%) - 3,854,050,442 stalled-cycles-backend:u # 19.16% backend cycles idle (75.04%) - 59,072,951,526 instructions:u # 2.94 insn per cycle - # 0.07 stalled cycles per insn (75.04%) - 5.740328179 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1149) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest.exe + 20,033,174,405 cycles # 2.911 GHz + 60,534,325,532 instructions # 3.02 insn per cycle + 6.882069019 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1399) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213859069593 Relative difference = 4.345647726386255e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.087732e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.139458e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.139458e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 2.718917 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.728631e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.774861e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.774861e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.488457 sec INFO: No Floating Point Exceptions have been reported - 9,530,401,613 cycles:u # 3.501 GHz (74.97%) - 2,075,703 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.03%) - 2,354,560,620 stalled-cycles-backend:u # 24.71% backend cycles idle (75.03%) - 29,677,385,435 instructions:u # 3.11 insn per cycle - # 0.08 stalled cycles per insn (75.02%) - 2.726176913 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4873) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest.exe + 10,181,292,152 cycles # 2.916 GHz + 30,384,307,915 instructions # 2.98 insn per cycle + 3.492671905 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5280) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213792564823 Relative difference = 4.392710025734405e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.251275e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.273000e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.273000e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.336639 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.201354e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.375070e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.375070e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.803934 sec +INFO: No Floating Point Exceptions have been reported + 4,869,150,810 cycles # 2.694 GHz + 10,978,562,560 instructions # 2.25 insn per cycle + 1.808136290 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4624) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213600217192 +Relative difference = 4.5288254008796884e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.054680e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.077442e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.077442e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.576690 sec INFO: No Floating Point Exceptions have been reported - 4,666,727,671 cycles:u # 3.482 GHz (74.93%) - 2,106,240 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.93%) - 1,554,204,116 stalled-cycles-backend:u # 33.30% backend cycles idle (74.93%) - 11,146,255,994 instructions:u # 2.39 insn per cycle - # 0.14 stalled cycles per insn (74.93%) - 1.343598832 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4581) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest.exe + 4,287,902,457 cycles # 2.713 GHz + 10,247,451,592 instructions # 2.39 insn per cycle + 1.581011258 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4280) (512y: 82) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213600217192 Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.877704e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.973161e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.973161e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.407671 sec +INFO: No Floating Point Exceptions have been reported + 4,207,339,558 cycles # 1.745 GHz + 6,043,036,802 instructions # 1.44 insn per cycle + 2.411911339 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2066) (512y: 117) (512z: 3540) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213786174055 +Relative difference = 4.3972324717191576e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt index 7a8ec93c4e..e88f85bc0c 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-15_15:57:42 +DATE: 2024-05-15_08:57:15 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.873499e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.008721e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.015293e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.872208e+03 +- 2.725298e+03 ) GeV^-2 -TOTAL : 0.387103 sec +EvtsPerSec[Rmb+ME] (23) = ( 8.932348e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.042306e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.055103e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.466469 sec INFO: No Floating Point Exceptions have been reported - 1,056,954,597 cycles:u # 2.564 GHz (74.08%) - 2,198,078 stalled-cycles-frontend:u # 0.21% frontend cycles idle (75.17%) - 5,105,476 stalled-cycles-backend:u # 0.48% backend cycles idle (75.00%) - 1,563,651,790 instructions:u # 1.48 insn per cycle - # 0.00 stalled cycles per insn (74.84%) - 0.432479829 seconds time elapsed + 1,971,792,438 cycles # 2.854 GHz + 2,835,098,064 instructions # 1.44 insn per cycle + 0.747519363 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.632889e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.835986e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.841678e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.805651e+03 +- 1.746055e+03 ) GeV^-2 -TOTAL : 0.605876 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.116570e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.312584e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.323780e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.596823 sec INFO: No Floating Point Exceptions have been reported - 1,702,422,059 cycles:u # 2.689 GHz (74.54%) - 2,184,475 stalled-cycles-frontend:u # 0.13% frontend cycles idle (74.76%) - 5,598,750 stalled-cycles-backend:u # 0.33% backend cycles idle (75.03%) - 2,004,802,493 instructions:u # 1.18 insn per cycle - # 0.00 stalled cycles per insn (75.83%) - 0.658456690 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/runTest.exe + 2,383,594,932 cycles # 2.859 GHz + 3,684,187,301 instructions # 1.55 insn per cycle + 0.891590073 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213755569483 -Relative difference = 4.4188898885662695e-07 +Avg ME (F77/GPU) = 1.4131213755569487 +Relative difference = 4.418889885423659e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.906678e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.918594e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.918594e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 5.664488 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.413781e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.425854e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.425854e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.811710 sec INFO: No Floating Point Exceptions have been reported - 19,869,141,415 cycles:u # 3.506 GHz (74.95%) - 2,403,537 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.02%) - 3,922,827,934 stalled-cycles-backend:u # 19.74% backend cycles idle (75.02%) - 58,608,353,930 instructions:u # 2.95 insn per cycle - # 0.07 stalled cycles per insn (75.02%) - 5.671993758 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1026) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest.exe + 19,857,309,753 cycles # 2.914 GHz + 59,935,036,773 instructions # 3.02 insn per cycle + 6.815878048 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1276) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213859069593 Relative difference = 4.345647726386255e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.271939e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.327248e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.327248e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 2.639703 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.758386e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.805205e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.805205e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.466198 sec INFO: No Floating Point Exceptions have been reported - 9,236,580,182 cycles:u # 3.494 GHz (74.89%) - 2,244,953 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.88%) - 1,810,719,115 stalled-cycles-backend:u # 19.60% backend cycles idle (74.89%) - 30,142,890,449 instructions:u # 3.26 insn per cycle - # 0.06 stalled cycles per insn (75.01%) - 2.647015728 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4944) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest.exe + 10,083,849,462 cycles # 2.907 GHz + 30,099,290,738 instructions # 2.98 insn per cycle + 3.470474717 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5082) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213792564823 Relative difference = 4.392710025734405e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.228737e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.249673e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.249673e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.360409 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.978413e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.141940e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.141940e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.847805 sec +INFO: No Floating Point Exceptions have been reported + 5,017,288,589 cycles # 2.710 GHz + 11,482,665,006 instructions # 2.29 insn per cycle + 1.852007665 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4723) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213600217192 +Relative difference = 4.5288254008796884e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.808178e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.000241e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.000241e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.694132 sec INFO: No Floating Point Exceptions have been reported - 4,751,043,404 cycles:u # 3.483 GHz (74.78%) - 2,034,803 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.78%) - 1,525,164,530 stalled-cycles-backend:u # 32.10% backend cycles idle (74.73%) - 11,644,210,563 instructions:u # 2.45 insn per cycle - # 0.13 stalled cycles per insn (75.02%) - 1.367338479 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4685) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest.exe + 4,593,132,694 cycles # 2.706 GHz + 10,809,915,136 instructions # 2.35 insn per cycle + 1.698316035 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4285) (512y: 234) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213600217192 Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.855597e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.950807e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.950807e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.415375 sec +INFO: No Floating Point Exceptions have been reported + 4,225,925,982 cycles # 1.747 GHz + 6,273,431,165 instructions # 1.48 insn per cycle + 2.419573682 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1961) (512y: 163) (512z: 3617) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213786174055 +Relative difference = 4.3972324717191576e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index 8dd5bdd130..35f4b07c31 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-15_15:57:57 +DATE: 2024-05-15_08:57:40 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.352876e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.532511e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.533908e+04 ) sec^-1 -MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 -TOTAL : 0.658527 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.453896e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.476957e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.479157e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.528972 sec INFO: No Floating Point Exceptions have been reported - 1,888,638,728 cycles:u # 2.889 GHz (75.48%) - 2,109,824 stalled-cycles-frontend:u # 0.11% frontend cycles idle (75.55%) - 6,355,443 stalled-cycles-backend:u # 0.34% backend cycles idle (75.55%) - 2,112,081,809 instructions:u # 1.12 insn per cycle - # 0.00 stalled cycles per insn (75.28%) - 0.704110953 seconds time elapsed + 2,207,774,651 cycles # 2.859 GHz + 3,437,899,945 instructions # 1.56 insn per cycle + 0.831248833 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.241230e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.243695e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.243758e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 -TOTAL : 7.697250 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.141343e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.168816e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.169999e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.026793 sec INFO: No Floating Point Exceptions have been reported - 26,573,207,605 cycles:u # 3.439 GHz (74.91%) - 3,155,326 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.95%) - 6,300,281 stalled-cycles-backend:u # 0.02% backend cycles idle (75.05%) - 21,102,057,691 instructions:u # 0.79 insn per cycle - # 0.00 stalled cycles per insn (75.05%) - 7.750974681 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/runTest.exe + 9,518,294,264 cycles # 2.901 GHz + 21,429,637,627 instructions # 2.25 insn per cycle + 3.336351765 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158101E-004 -Relative difference = 2.837296517127185e-07 +Avg ME (F77/GPU) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.204102e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.204965e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.204965e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 7.452461 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.869535e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.870457e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.870457e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.782045 sec INFO: No Floating Point Exceptions have been reported - 26,115,820,847 cycles:u # 3.503 GHz (75.00%) - 31,665,677 stalled-cycles-frontend:u # 0.12% frontend cycles idle (75.00%) - 2,969,259,653 stalled-cycles-backend:u # 11.37% backend cycles idle (75.00%) - 81,648,884,549 instructions:u # 3.13 insn per cycle - # 0.04 stalled cycles per insn (75.00%) - 7.459873901 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 6614) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe + 25,590,278,249 cycles # 2.913 GHz + 78,937,068,953 instructions # 3.08 insn per cycle + 8.786204437 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4893) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 5.040487e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.045133e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.045133e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.265295 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.594981e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.598272e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.598272e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.571469 sec INFO: No Floating Point Exceptions have been reported - 11,438,272,299 cycles:u # 3.499 GHz (75.01%) - 1,570,411 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.04%) - 1,338,708,154 stalled-cycles-backend:u # 11.70% backend cycles idle (75.04%) - 39,146,792,626 instructions:u # 3.42 insn per cycle - # 0.03 stalled cycles per insn (75.04%) - 3.272705628 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:12814) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe + 12,870,732,355 cycles # 2.813 GHz + 39,279,748,127 instructions # 3.05 insn per cycle + 4.575784377 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13184) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.202724e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.205315e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.205315e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.375160 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.041516e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.057538e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.057538e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.048843 sec +INFO: No Floating Point Exceptions have been reported + 5,574,639,516 cycles # 2.716 GHz + 13,685,609,494 instructions # 2.45 insn per cycle + 2.053053177 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.168356e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.189435e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.189435e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.798126 sec INFO: No Floating Point Exceptions have been reported - 4,819,692,655 cycles:u # 3.496 GHz (74.94%) - 848,155 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.05%) - 532,717,972 stalled-cycles-backend:u # 11.05% backend cycles idle (75.05%) - 13,721,077,924 instructions:u # 2.85 insn per cycle - # 0.04 stalled cycles per insn (75.05%) - 1.383011608 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11059) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe + 4,886,886,725 cycles # 2.713 GHz + 12,340,823,521 instructions # 2.53 insn per cycle + 1.802269756 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157309E-004 -Relative difference = 2.837296636563793e-07 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.912225e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.924556e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.924556e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.382421 sec +INFO: No Floating Point Exceptions have been reported + 4,113,294,297 cycles # 1.724 GHz + 6,335,943,255 instructions # 1.54 insn per cycle + 2.386734974 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt index 720d8ad564..fb56deab3c 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt @@ -1,204 +1,250 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-15_16:25:33 +DATE: 2024-05-15_09:20:40 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.357053e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.499082e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.499082e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.652708 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.120517e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.462866e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.462866e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.521719 sec INFO: No Floating Point Exceptions have been reported - 1,981,267,905 cycles:u # 2.936 GHz (74.61%) - 2,766,739 stalled-cycles-frontend:u # 0.14% frontend cycles idle (74.54%) - 43,257,031 stalled-cycles-backend:u # 2.18% backend cycles idle (74.53%) - 2,220,306,469 instructions:u # 1.12 insn per cycle - # 0.02 stalled cycles per insn (75.07%) - 0.699482547 seconds time elapsed + 2,119,788,652 cycles # 2.819 GHz + 3,351,368,851 instructions # 1.58 insn per cycle + 0.812195705 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.210119e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.244985e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.244985e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 -TOTAL : 8.530658 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.618604e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.118495e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.118495e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.329148 sec INFO: No Floating Point Exceptions have been reported - 29,318,532,360 cycles:u # 3.422 GHz (74.97%) - 22,948,494 stalled-cycles-frontend:u # 0.08% frontend cycles idle (74.97%) - 1,130,187,974 stalled-cycles-backend:u # 3.85% backend cycles idle (74.98%) - 23,588,060,043 instructions:u # 0.80 insn per cycle - # 0.05 stalled cycles per insn (75.00%) - 8.587592843 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/runTest.exe + 10,272,634,162 cycles # 2.852 GHz + 22,078,504,888 instructions # 2.15 insn per cycle + 3.660227253 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158101E-004 -Relative difference = 2.837296517127185e-07 +Avg ME (F77/GPU) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.224704e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.225594e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.225594e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 7.386700 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.837905e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.838776e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.838776e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.937469 sec INFO: No Floating Point Exceptions have been reported - 25,885,268,757 cycles:u # 3.503 GHz (75.00%) - 8,436,170 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.00%) - 3,081,829,357 stalled-cycles-backend:u # 11.91% backend cycles idle (75.00%) - 81,646,491,395 instructions:u # 3.15 insn per cycle - # 0.04 stalled cycles per insn (75.00%) - 7.394511398 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 6614) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe + 25,594,477,998 cycles # 2.863 GHz + 78,950,199,042 instructions # 3.08 insn per cycle + 8.941935349 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4893) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 5.041063e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.045600e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.045600e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.268311 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.503068e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.506337e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.506337e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.695365 sec INFO: No Floating Point Exceptions have been reported - 11,460,129,702 cycles:u # 3.503 GHz (74.93%) - 714,318 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.05%) - 1,383,473,387 stalled-cycles-backend:u # 12.07% backend cycles idle (75.06%) - 39,145,894,854 instructions:u # 3.42 insn per cycle - # 0.04 stalled cycles per insn (75.06%) - 3.275584699 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:12814) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe + 12,898,085,999 cycles # 2.746 GHz + 39,295,739,926 instructions # 3.05 insn per cycle + 4.700102170 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13184) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.202223e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.204890e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.204890e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.379340 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.875978e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.891827e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.891827e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.096243 sec INFO: No Floating Point Exceptions have been reported - 4,831,323,740 cycles:u # 3.493 GHz (74.85%) - 408,845 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.12%) - 531,131,302 stalled-cycles-backend:u # 10.99% backend cycles idle (75.13%) - 13,717,644,390 instructions:u # 2.84 insn per cycle - # 0.04 stalled cycles per insn (75.13%) - 1.386770607 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11059) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe + 5,586,723,161 cycles # 2.661 GHz + 13,697,374,494 instructions # 2.45 insn per cycle + 2.100722433 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157309E-004 -Relative difference = 2.837296636563793e-07 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.982842e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.004357e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.004357e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.839387 sec +INFO: No Floating Point Exceptions have been reported + 4,902,530,960 cycles # 2.660 GHz + 12,352,684,842 instructions # 2.52 insn per cycle + 1.844032539 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.719703e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.732034e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.732034e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.454573 sec +INFO: No Floating Point Exceptions have been reported + 4,128,287,661 cycles # 1.680 GHz + 6,347,060,627 instructions # 1.54 insn per cycle + 2.459158049 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt index 966cc59f5c..0d1c3bb946 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-15_16:31:58 +DATE: 2024-05-15_09:30:29 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/gcheck.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.349266e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.531209e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.532381e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.485197e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.513349e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.516105e+05 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.654130 sec +TOTAL : 0.515285 sec INFO: No Floating Point Exceptions have been reported - 1,991,618,871 cycles:u # 2.935 GHz (74.57%) - 2,424,041 stalled-cycles-frontend:u # 0.12% frontend cycles idle (75.41%) - 34,189,430 stalled-cycles-backend:u # 1.72% backend cycles idle (75.26%) - 2,163,613,736 instructions:u # 1.09 insn per cycle - # 0.02 stalled cycles per insn (75.35%) - 0.699136584 seconds time elapsed + 2,091,548,257 cycles # 2.809 GHz + 3,228,803,869 instructions # 1.54 insn per cycle + 0.805824094 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.238353e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.241268e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.241327e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.150668e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.184268e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.185674e+05 ) sec^-1 MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 -TOTAL : 8.406295 sec +TOTAL : 3.132364 sec INFO: No Floating Point Exceptions have been reported - 28,943,119,889 cycles:u # 3.428 GHz (75.00%) - 11,675,494 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.99%) - 1,122,815,591 stalled-cycles-backend:u # 3.88% backend cycles idle (74.99%) - 22,685,902,737 instructions:u # 0.78 insn per cycle - # 0.05 stalled cycles per insn (74.98%) - 8.462672542 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/runTest.exe + 9,662,847,578 cycles # 2.852 GHz + 21,406,248,753 instructions # 2.22 insn per cycle + 3.445338887 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158101E-004 -Relative difference = 2.837296517127185e-07 +Avg ME (F77/GPU) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.227440e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.228327e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.228327e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.837499e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.838361e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.838361e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 7.374640 sec +TOTAL : 8.936739 sec INFO: No Floating Point Exceptions have been reported - 25,848,687,597 cycles:u # 3.504 GHz (74.95%) - 3,873,133 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.95%) - 3,016,465,510 stalled-cycles-backend:u # 11.67% backend cycles idle (74.98%) - 81,681,062,341 instructions:u # 3.16 insn per cycle - # 0.04 stalled cycles per insn (75.03%) - 7.378912231 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 6614) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe + 25,597,377,366 cycles # 2.864 GHz + 78,941,252,238 instructions # 3.08 insn per cycle + 8.940922788 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4893) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 5.047807e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.052416e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.052416e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.515333e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.518537e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.518537e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.260685 sec +TOTAL : 4.676416 sec INFO: No Floating Point Exceptions have been reported - 11,417,527,634 cycles:u # 3.499 GHz (75.00%) - 1,067,583 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.00%) - 1,390,452,689 stalled-cycles-backend:u # 12.18% backend cycles idle (75.00%) - 39,151,712,810 instructions:u # 3.43 insn per cycle - # 0.04 stalled cycles per insn (75.00%) - 3.264755542 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:12814) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe + 12,904,346,221 cycles # 2.758 GHz + 39,280,145,215 instructions # 3.04 insn per cycle + 4.680551685 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13184) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.196928e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.199480e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.199480e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.866398e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.881997e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.881997e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 2.096164 sec +INFO: No Floating Point Exceptions have been reported + 5,577,736,611 cycles # 2.657 GHz + 13,686,218,579 instructions # 2.45 insn per cycle + 2.100327168 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe -p 64 256 1 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.968356e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.988747e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.988747e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.381758 sec +TOTAL : 1.839965 sec INFO: No Floating Point Exceptions have been reported - 4,845,646,776 cycles:u # 3.500 GHz (74.76%) - 1,894,319 stalled-cycles-frontend:u # 0.04% frontend cycles idle (75.05%) - 551,051,710 stalled-cycles-backend:u # 11.37% backend cycles idle (75.16%) - 13,721,268,897 instructions:u # 2.83 insn per cycle - # 0.04 stalled cycles per insn (75.16%) - 1.385838273 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11059) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe + 4,891,342,706 cycles # 2.654 GHz + 12,339,210,101 instructions # 2.52 insn per cycle + 1.844117982 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157309E-004 -Relative difference = 2.837296636563793e-07 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe -p 64 256 1 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.718515e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.730467e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.730467e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 2.452431 sec +INFO: No Floating Point Exceptions have been reported + 4,114,726,339 cycles # 1.676 GHz + 6,333,274,004 instructions # 1.54 insn per cycle + 2.456739984 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt index 64d0049a09..f055e3f9ce 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt @@ -1,195 +1,236 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-15_16:29:49 +DATE: 2024-05-15_09:24:58 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.440619e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.571586e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.572951e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.667982 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.174424e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.487943e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.490424e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.518038 sec INFO: No Floating Point Exceptions have been reported - 1,967,980,387 cycles:u # 2.929 GHz (74.36%) - 2,862,408 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.36%) - 41,016,977 stalled-cycles-backend:u # 2.08% backend cycles idle (75.01%) - 2,153,418,255 instructions:u # 1.09 insn per cycle - # 0.02 stalled cycles per insn (75.12%) - 0.711595964 seconds time elapsed + 2,104,506,630 cycles # 2.820 GHz + 3,337,647,884 instructions # 1.59 insn per cycle + 0.807730228 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.210461e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.242411e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.242483e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 -TOTAL : 8.509495 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.697139e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.147776e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.149144e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.231241 sec INFO: No Floating Point Exceptions have been reported - 29,333,761,340 cycles:u # 3.434 GHz (75.00%) - 23,002,873 stalled-cycles-frontend:u # 0.08% frontend cycles idle (74.99%) - 1,133,831,790 stalled-cycles-backend:u # 3.87% backend cycles idle (74.99%) - 23,642,738,991 instructions:u # 0.81 insn per cycle - # 0.05 stalled cycles per insn (75.00%) - 8.562224782 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/runTest.exe + 9,935,469,738 cycles # 2.849 GHz + 22,841,719,102 instructions # 2.30 insn per cycle + 3.545516982 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158101E-004 -Relative difference = 2.837296517127185e-07 +Avg ME (F77/GPU) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.223091e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.223976e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.223976e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 7.388974 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.836224e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.837114e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.837114e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.941157 sec INFO: No Floating Point Exceptions have been reported - 25,907,464,121 cycles:u # 3.505 GHz (75.00%) - 8,486,511 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.00%) - 2,965,472,064 stalled-cycles-backend:u # 11.45% backend cycles idle (75.00%) - 81,629,784,811 instructions:u # 3.15 insn per cycle - # 0.04 stalled cycles per insn (75.00%) - 7.670126649 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 6614) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe + 25,607,708,752 cycles # 2.863 GHz + 78,941,589,821 instructions # 3.08 insn per cycle + 8.945374108 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4893) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 5.044247e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.048783e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.048783e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.262983 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.474198e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.477295e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.477295e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.729705 sec INFO: No Floating Point Exceptions have been reported - 11,431,656,379 cycles:u # 3.500 GHz (75.00%) - 574,795 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.02%) - 1,405,469,497 stalled-cycles-backend:u # 12.29% backend cycles idle (75.02%) - 39,149,355,532 instructions:u # 3.42 insn per cycle - # 0.04 stalled cycles per insn (75.02%) - 3.267353644 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:12814) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe + 12,895,914,246 cycles # 2.725 GHz + 39,280,577,356 instructions # 3.05 insn per cycle + 4.733927981 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13184) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.204809e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.207400e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.207400e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.372933 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.859092e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.874494e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.874494e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.096348 sec +INFO: No Floating Point Exceptions have been reported + 5,572,573,379 cycles # 2.654 GHz + 13,685,971,434 instructions # 2.46 insn per cycle + 2.100551652 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.971088e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.991417e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.991417e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.838609 sec INFO: No Floating Point Exceptions have been reported - 4,797,791,497 cycles:u # 3.488 GHz (75.00%) - 341,385 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.00%) - 532,304,869 stalled-cycles-backend:u # 11.09% backend cycles idle (75.00%) - 13,721,099,517 instructions:u # 2.86 insn per cycle - # 0.04 stalled cycles per insn (75.00%) - 1.377048855 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11059) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe + 4,886,906,640 cycles # 2.654 GHz + 12,340,946,515 instructions # 2.53 insn per cycle + 1.842847914 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157309E-004 -Relative difference = 2.837296636563793e-07 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.727851e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.739515e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.739515e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.447641 sec +INFO: No Floating Point Exceptions have been reported + 4,105,054,154 cycles # 1.675 GHz + 6,334,788,892 instructions # 1.54 insn per cycle + 2.451834789 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt index a84f43f38d..596c0e139f 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-15_15:58:24 +DATE: 2024-05-15_08:58:13 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.382317e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.439244e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.439750e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 -TOTAL : 0.513473 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.479340e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.502998e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.505330e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.530199 sec INFO: No Floating Point Exceptions have been reported - 1,502,112,076 cycles:u # 2.810 GHz (74.52%) - 2,253,278 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.12%) - 7,775,573 stalled-cycles-backend:u # 0.52% backend cycles idle (74.58%) - 1,836,916,559 instructions:u # 1.22 insn per cycle - # 0.00 stalled cycles per insn (74.65%) - 0.561281368 seconds time elapsed + 2,205,377,507 cycles # 2.861 GHz + 3,376,536,792 instructions # 1.53 insn per cycle + 0.831148875 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.738429e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.743464e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.743577e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 -TOTAL : 6.335273 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.151750e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.179384e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.180577e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.017740 sec INFO: No Floating Point Exceptions have been reported - 21,796,344,827 cycles:u # 3.425 GHz (74.90%) - 2,923,143 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.98%) - 6,274,063 stalled-cycles-backend:u # 0.03% backend cycles idle (74.99%) - 17,459,408,904 instructions:u # 0.80 insn per cycle - # 0.00 stalled cycles per insn (75.02%) - 6.389762832 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/runTest.exe + 9,499,607,795 cycles # 2.896 GHz + 21,512,832,515 instructions # 2.26 insn per cycle + 3.335785909 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158101E-004 -Relative difference = 2.837296517127185e-07 +Avg ME (F77/GPU) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.214028e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.214909e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.214909e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 7.418926 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.875938e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.876826e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.876826e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.751856 sec INFO: No Floating Point Exceptions have been reported - 26,004,260,668 cycles:u # 3.504 GHz (75.00%) - 18,242,709 stalled-cycles-frontend:u # 0.07% frontend cycles idle (75.00%) - 2,944,961,444 stalled-cycles-backend:u # 11.32% backend cycles idle (75.00%) - 81,674,747,927 instructions:u # 3.14 insn per cycle - # 0.04 stalled cycles per insn (75.00%) - 7.426102928 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 6589) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest.exe + 25,512,720,979 cycles # 2.914 GHz + 78,709,548,508 instructions # 3.09 insn per cycle + 8.755998767 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4264) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 5.061721e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.066293e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.066293e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.251420 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.496486e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.499607e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.499607e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.699409 sec INFO: No Floating Point Exceptions have been reported - 11,395,658,100 cycles:u # 3.501 GHz (74.98%) - 1,307,293 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.94%) - 1,479,855,615 stalled-cycles-backend:u # 12.99% backend cycles idle (74.93%) - 39,187,358,585 instructions:u # 3.44 insn per cycle - # 0.04 stalled cycles per insn (74.93%) - 3.258428830 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:12771) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest.exe + 12,966,578,114 cycles # 2.758 GHz + 39,226,351,463 instructions # 3.03 insn per cycle + 4.703633821 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:12951) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.194480e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.197028e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.197028e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.384380 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.959665e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.975475e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.975475e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.069543 sec +INFO: No Floating Point Exceptions have been reported + 5,619,385,674 cycles # 2.711 GHz + 13,800,122,512 instructions # 2.46 insn per cycle + 2.073748619 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11422) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.998297e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.018203e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.018203e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.831905 sec INFO: No Floating Point Exceptions have been reported - 4,850,262,020 cycles:u # 3.495 GHz (74.69%) - 444,362 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.68%) - 571,850,754 stalled-cycles-backend:u # 11.79% backend cycles idle (74.97%) - 13,733,484,308 instructions:u # 2.83 insn per cycle - # 0.04 stalled cycles per insn (75.22%) - 1.391537224 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11048) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest.exe + 4,976,755,667 cycles # 2.711 GHz + 12,465,998,942 instructions # 2.50 insn per cycle + 1.836219257 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10258) (512y: 240) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157309E-004 -Relative difference = 2.837296636563793e-07 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.913751e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.925860e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.925860e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.381526 sec +INFO: No Floating Point Exceptions have been reported + 4,118,252,785 cycles # 1.727 GHz + 6,458,362,100 instructions # 1.57 insn per cycle + 2.385687443 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1647) (512y: 192) (512z: 9375) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt index d21e013e19..4d6259fe05 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-15_16:13:41 +DATE: 2024-05-15_09:11:14 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/gcheck.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.394078e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.555997e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.557360e+04 ) sec^-1 -MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 -TOTAL : 0.635308 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.244117e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.269243e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.271350e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.537946 sec INFO: No Floating Point Exceptions have been reported - 1,928,274,978 cycles:u # 2.952 GHz (74.32%) - 2,152,412 stalled-cycles-frontend:u # 0.11% frontend cycles idle (75.45%) - 5,643,942 stalled-cycles-backend:u # 0.29% backend cycles idle (75.53%) - 2,100,599,060 instructions:u # 1.09 insn per cycle - # 0.00 stalled cycles per insn (75.52%) - 0.680152805 seconds time elapsed + 2,167,156,159 cycles # 2.821 GHz + 3,392,966,659 instructions # 1.57 insn per cycle + 0.824971117 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/gcheck.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.241824e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.244364e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.244427e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 -TOTAL : 7.691029 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.760050e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.786712e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.787882e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.315956 sec INFO: No Floating Point Exceptions have been reported - 26,521,594,349 cycles:u # 3.435 GHz (75.04%) - 3,163,261 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.03%) - 6,509,269 stalled-cycles-backend:u # 0.02% backend cycles idle (75.04%) - 21,161,721,500 instructions:u # 0.80 insn per cycle - # 0.00 stalled cycles per insn (75.03%) - 7.743039364 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/runTest.exe + 10,224,332,396 cycles # 2.854 GHz + 23,077,972,962 instructions # 2.26 insn per cycle + 3.638688936 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158101E-004 -Relative difference = 2.837296517127185e-07 +Avg ME (F77/GPU) = 6.6266731198158122E-004 +Relative difference = 2.837296513854949e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.587858e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.588234e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.588234e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 35.758530 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.122268e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.122707e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.122707e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 39.794899 sec INFO: No Floating Point Exceptions have been reported - 125,378,748,124 cycles:u # 3.506 GHz (74.97%) - 9,493,427 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.98%) - 17,315,752,460 stalled-cycles-backend:u # 13.81% backend cycles idle (75.00%) - 140,999,688,570 instructions:u # 1.12 insn per cycle - # 0.12 stalled cycles per insn (75.01%) - 35.768742055 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:21092) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest.exe + 113,687,314,934 cycles # 2.857 GHz + 144,823,823,271 instructions # 1.27 insn per cycle + 39.799159131 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:21353) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198140461E-004 -Relative difference = 2.8372991790910424e-07 +Avg ME (F77/C++) = 6.6266731198140450E-004 +Relative difference = 2.83729918072716e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.663594e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.665950e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.665950e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 4.488523 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.013716e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.016144e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.016144e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 5.450973 sec INFO: No Floating Point Exceptions have been reported - 15,741,680,138 cycles:u # 3.504 GHz (74.92%) - 1,724,770 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.00%) - 7,822,970,699 stalled-cycles-backend:u # 49.70% backend cycles idle (75.07%) - 37,448,442,237 instructions:u # 2.38 insn per cycle - # 0.21 stalled cycles per insn (75.07%) - 4.496025758 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:68052) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest.exe + 14,763,659,201 cycles # 2.707 GHz + 37,576,144,793 instructions # 2.55 insn per cycle + 5.455412399 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:68119) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198141220E-004 -Relative difference = 2.837299064562788e-07 +Avg ME (F77/C++) = 6.6266731198141209E-004 +Relative difference = 2.8372990661989057e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 7.589511e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.600071e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.600071e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 2.172740 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.154145e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.167309e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.167309e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.301876 sec +INFO: No Floating Point Exceptions have been reported + 6,125,614,448 cycles # 2.657 GHz + 13,062,162,541 instructions # 2.13 insn per cycle + 2.306100470 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:46960) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198156789E-004 +Relative difference = 2.837296715097453e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.664032e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.683149e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.683149e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.902577 sec INFO: No Floating Point Exceptions have been reported - 7,597,061,684 cycles:u # 3.491 GHz (75.01%) - 537,764 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.00%) - 4,403,802,024 stalled-cycles-backend:u # 57.97% backend cycles idle (75.01%) - 12,901,611,122 instructions:u # 1.70 insn per cycle - # 0.34 stalled cycles per insn (75.01%) - 2.180024620 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:46593) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest.exe + 5,063,734,571 cycles # 2.656 GHz + 11,440,604,174 instructions # 2.26 insn per cycle + 1.906973772 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:40434) (512y: 285) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198156778E-004 -Relative difference = 2.837296716733571e-07 +Avg ME (F77/C++) = 6.6266731198156789E-004 +Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.952458e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.964708e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.964708e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.368892 sec +INFO: No Floating Point Exceptions have been reported + 3,971,908,402 cycles # 1.674 GHz + 5,943,368,618 instructions # 1.50 insn per cycle + 2.373284376 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2455) (512y: 337) (512z:39411) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198156789E-004 +Relative difference = 2.837296715097453e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt index 5dce3f4a7c..c5d3a4f2ff 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-15_16:14:41 +DATE: 2024-05-15_09:12:21 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/gcheck.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.370095e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.428040e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.428561e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 -TOTAL : 0.516294 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.275128e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.300396e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.302627e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.537226 sec INFO: No Floating Point Exceptions have been reported - 1,508,920,262 cycles:u # 2.817 GHz (74.13%) - 2,154,249 stalled-cycles-frontend:u # 0.14% frontend cycles idle (74.80%) - 5,821,644 stalled-cycles-backend:u # 0.39% backend cycles idle (74.68%) - 1,849,104,202 instructions:u # 1.23 insn per cycle - # 0.00 stalled cycles per insn (74.64%) - 0.560262555 seconds time elapsed + 2,163,917,663 cycles # 2.821 GHz + 3,355,130,894 instructions # 1.55 insn per cycle + 0.824217982 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/gcheck.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.739330e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.744226e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.744350e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 -TOTAL : 6.429607 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.758771e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.785429e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.786568e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.307138 sec INFO: No Floating Point Exceptions have been reported - 21,778,736,572 cycles:u # 3.424 GHz (74.89%) - 2,923,981 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.99%) - 6,184,764 stalled-cycles-backend:u # 0.03% backend cycles idle (74.98%) - 17,426,204,182 instructions:u # 0.80 insn per cycle - # 0.00 stalled cycles per insn (75.01%) - 6.484529429 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/runTest.exe + 10,162,507,375 cycles # 2.853 GHz + 22,962,240,536 instructions # 2.26 insn per cycle + 3.619532536 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158101E-004 -Relative difference = 2.837296517127185e-07 +Avg ME (F77/GPU) = 6.6266731198158122E-004 +Relative difference = 2.837296513854949e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.523620e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.523987e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.523987e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 36.266140 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.129690e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.130140e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.130140e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 39.722641 sec INFO: No Floating Point Exceptions have been reported - 127,158,371,335 cycles:u # 3.506 GHz (74.98%) - 75,046,009 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.00%) - 18,252,299,466 stalled-cycles-backend:u # 14.35% backend cycles idle (75.01%) - 141,512,071,410 instructions:u # 1.11 insn per cycle - # 0.13 stalled cycles per insn (75.01%) - 36.273412907 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:21725) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest.exe + 113,571,444,042 cycles # 2.859 GHz + 144,786,097,470 instructions # 1.27 insn per cycle + 39.726802831 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:20719) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198140461E-004 Relative difference = 2.8372991790910424e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.673222e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.675673e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.675673e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 4.476264 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.953753e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.955997e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.955997e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 5.561657 sec INFO: No Floating Point Exceptions have been reported - 15,690,793,260 cycles:u # 3.503 GHz (75.00%) - 1,996,278 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.00%) - 7,687,242,311 stalled-cycles-backend:u # 48.99% backend cycles idle (75.00%) - 37,504,259,650 instructions:u # 2.39 insn per cycle - # 0.20 stalled cycles per insn (75.00%) - 4.483480776 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:68056) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest.exe + 15,215,969,839 cycles # 2.735 GHz + 37,766,601,033 instructions # 2.48 insn per cycle + 5.566026750 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:68447) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198141220E-004 -Relative difference = 2.837299064562788e-07 +Avg ME (F77/C++) = 6.6266731198141209E-004 +Relative difference = 2.8372990661989057e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 7.744723e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.755449e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.755449e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 2.129140 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.301121e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.314990e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.314990e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.255685 sec +INFO: No Floating Point Exceptions have been reported + 5,997,807,839 cycles # 2.655 GHz + 12,896,119,219 instructions # 2.15 insn per cycle + 2.260006581 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:45929) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198156789E-004 +Relative difference = 2.837296715097453e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.623672e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.642489e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.642489e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.911380 sec INFO: No Floating Point Exceptions have been reported - 7,449,523,167 cycles:u # 3.493 GHz (74.87%) - 392,094 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.87%) - 4,290,565,052 stalled-cycles-backend:u # 57.60% backend cycles idle (74.87%) - 12,820,412,529 instructions:u # 1.72 insn per cycle - # 0.33 stalled cycles per insn (74.96%) - 2.136397182 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:45663) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest.exe + 5,087,031,928 cycles # 2.656 GHz + 11,446,947,598 instructions # 2.25 insn per cycle + 1.915663527 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:40123) (512y: 219) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198156778E-004 -Relative difference = 2.837296716733571e-07 +Avg ME (F77/C++) = 6.6266731198156789E-004 +Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.996536e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.008825e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.008825e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.353561 sec +INFO: No Floating Point Exceptions have been reported + 3,949,690,846 cycles # 1.676 GHz + 5,897,063,378 instructions # 1.49 insn per cycle + 2.357781802 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1971) (512y: 259) (512z:38937) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198156789E-004 +Relative difference = 2.837296715097453e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index e345be8e70..cbfc88379e 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-15_15:58:49 +DATE: 2024-05-15_08:58:46 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.538301e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.734010e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.735595e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.535870e-02 +- 4.279978e-02 ) GeV^-4 -TOTAL : 0.422305 sec +EvtsPerSec[Rmb+ME] (23) = ( 6.371311e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.416400e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.421725e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 +TOTAL : 0.485367 sec INFO: No Floating Point Exceptions have been reported - 1,168,629,497 cycles:u # 2.647 GHz (75.35%) - 2,066,166 stalled-cycles-frontend:u # 0.18% frontend cycles idle (75.13%) - 5,548,523 stalled-cycles-backend:u # 0.47% backend cycles idle (74.66%) - 1,562,808,277 instructions:u # 1.34 insn per cycle - # 0.00 stalled cycles per insn (75.00%) - 0.468337743 seconds time elapsed + 2,002,413,256 cycles # 2.854 GHz + 2,969,576,839 instructions # 1.48 insn per cycle + 0.758471447 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.698210e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.724197e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.724638e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.763201e+01 +- 6.205778e+01 ) GeV^-4 -TOTAL : 2.609834 sec +EvtsPerSec[Rmb+ME] (23) = ( 8.620584e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.682469e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.685265e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 +TOTAL : 1.723049 sec INFO: No Floating Point Exceptions have been reported - 8,802,571,270 cycles:u # 3.338 GHz (74.86%) - 2,415,067 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.10%) - 4,820,027 stalled-cycles-backend:u # 0.05% backend cycles idle (75.11%) - 7,381,991,552 instructions:u # 0.84 insn per cycle - # 0.00 stalled cycles per insn (75.04%) - 2.659203909 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/runTest.exe + 5,638,197,939 cycles # 2.887 GHz + 11,952,390,273 instructions # 2.12 insn per cycle + 2.011373445 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/GPU) = 6.626791e-04 -Avg ME (F77/GPU) = 6.6270899361878938E-004 -Relative difference = 4.511024836808726e-05 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262659968156085E-004 +Relative difference = 2.8371612387547027e-05 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.467529e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.468586e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.468586e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208458e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 6.656468 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.939700e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.940642e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.940642e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 +TOTAL : 8.463553 sec INFO: No Floating Point Exceptions have been reported - 23,349,191,409 cycles:u # 3.506 GHz (74.98%) - 964,182 stalled-cycles-frontend:u # 0.00% frontend cycles idle (75.01%) - 3,020,129,414 stalled-cycles-backend:u # 12.93% backend cycles idle (75.01%) - 75,781,433,147 instructions:u # 3.25 insn per cycle - # 0.04 stalled cycles per insn (75.01%) - 6.664330081 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3898) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe + 24,602,533,365 cycles # 2.906 GHz + 78,128,390,385 instructions # 3.18 insn per cycle + 8.467657194 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3603) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 6.627487e-04 -Avg ME (F77/C++) = 6.6274866115424713E-004 -Relative difference = 5.861309557415831e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274863266294753E-004 +Relative difference = 4.92840687132121e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 9.932698e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.950490e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.950490e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 1.661008 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.990292e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.003122e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.003122e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 +TOTAL : 2.354252 sec INFO: No Floating Point Exceptions have been reported - 5,800,808,519 cycles:u # 3.486 GHz (75.01%) - 616,133 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.01%) - 801,256,663 stalled-cycles-backend:u # 13.81% backend cycles idle (75.01%) - 20,042,368,585 instructions:u # 3.46 insn per cycle - # 0.04 stalled cycles per insn (75.01%) - 1.667945050 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13237) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe + 6,470,915,919 cycles # 2.744 GHz + 20,120,315,745 instructions # 3.11 insn per cycle + 2.358972692 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 6.627485e-04 -Avg ME (F77/C++) = 6.6274845946848876E-004 -Relative difference = 6.115670001294808e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274861460025036E-004 +Relative difference = 2.2029847170826283e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.364461e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.374777e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.374777e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214980e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 0.703461 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.598984e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.605530e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.605530e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 1.034095 sec INFO: No Floating Point Exceptions have been reported - 2,450,180,140 cycles:u # 3.467 GHz (75.12%) - 319,319 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.10%) - 252,566,161 stalled-cycles-backend:u # 10.31% backend cycles idle (75.10%) - 6,979,289,115 instructions:u # 2.85 insn per cycle - # 0.04 stalled cycles per insn (75.10%) - 0.710302678 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11604) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe + 2,819,923,035 cycles # 2.718 GHz + 6,988,048,681 instructions # 2.48 insn per cycle + 1.038318254 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271947045332125E-004 -Relative difference = 4.4583988847766445e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174574524E-004 +Relative difference = 2.7544470208782633e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.815329e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.823687e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.823687e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.911531 sec +INFO: No Floating Point Exceptions have been reported + 2,491,445,921 cycles # 2.723 GHz + 6,295,438,291 instructions # 2.53 insn per cycle + 0.915618567 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174574524E-004 +Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.400172e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.405292e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.405292e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 +TOTAL : 1.179498 sec +INFO: No Floating Point Exceptions have been reported + 2,046,427,111 cycles # 1.730 GHz + 3,266,025,313 instructions # 1.60 insn per cycle + 1.183581210 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2427) (512y: 46) (512z: 9578) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271952779373838E-004 +Relative difference = 4.193891735414155e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt index 1a53d769b8..ed8784593a 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt @@ -1,204 +1,250 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-15_16:26:00 +DATE: 2024-05-15_09:21:13 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.529555e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.696847e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.696847e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.202335e-01 +- 3.251521e-01 ) GeV^-4 -TOTAL : 0.438870 sec +EvtsPerSec[Rmb+ME] (23) = ( 5.592056e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.298774e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.298774e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 +TOTAL : 0.475781 sec INFO: No Floating Point Exceptions have been reported - 1,286,892,816 cycles:u # 2.789 GHz (73.86%) - 2,836,732 stalled-cycles-frontend:u # 0.22% frontend cycles idle (74.06%) - 33,819,914 stalled-cycles-backend:u # 2.63% backend cycles idle (74.60%) - 1,635,906,811 instructions:u # 1.27 insn per cycle - # 0.02 stalled cycles per insn (75.68%) - 0.483134647 seconds time elapsed + 1,939,527,028 cycles # 2.815 GHz + 2,902,437,962 instructions # 1.50 insn per cycle + 0.747004510 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.260775e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.701838e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.701838e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.213799e+02 +- 1.195366e+02 ) GeV^-4 -TOTAL : 3.442850 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.214912e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.514571e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.514571e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.641710e+00 +- 4.994249e+00 ) GeV^-4 +TOTAL : 1.907786 sec INFO: No Floating Point Exceptions have been reported - 11,602,142,051 cycles:u # 3.342 GHz (74.91%) - 22,272,146 stalled-cycles-frontend:u # 0.19% frontend cycles idle (74.97%) - 1,166,093,542 stalled-cycles-backend:u # 10.05% backend cycles idle (75.11%) - 9,908,851,401 instructions:u # 0.85 insn per cycle - # 0.12 stalled cycles per insn (75.10%) - 3.493619221 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/runTest.exe + 6,122,056,386 cycles # 2.846 GHz + 12,557,649,687 instructions # 2.05 insn per cycle + 2.207056622 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/GPU) = 6.626791e-04 -Avg ME (F77/GPU) = 6.6270899361878938E-004 -Relative difference = 4.511024836808726e-05 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262659968156085E-004 +Relative difference = 2.8371612387547027e-05 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.465573e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.466633e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.466633e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208458e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 6.663414 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.906950e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.907876e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.907876e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 +TOTAL : 8.611285 sec INFO: No Floating Point Exceptions have been reported - 23,375,582,467 cycles:u # 3.506 GHz (74.92%) - 1,998,077 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.97%) - 3,094,020,929 stalled-cycles-backend:u # 13.24% backend cycles idle (75.03%) - 75,778,856,835 instructions:u # 3.24 insn per cycle - # 0.04 stalled cycles per insn (75.04%) - 6.670436994 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3898) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe + 24,664,256,092 cycles # 2.863 GHz + 78,132,918,841 instructions # 3.17 insn per cycle + 8.615492497 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3603) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 6.627487e-04 -Avg ME (F77/C++) = 6.6274866115424713E-004 -Relative difference = 5.861309557415831e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274863266294753E-004 +Relative difference = 4.92840687132121e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 9.820071e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.838035e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.838035e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 1.681875 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.867477e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.879890e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.879890e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 +TOTAL : 2.399085 sec INFO: No Floating Point Exceptions have been reported - 5,818,447,218 cycles:u # 3.461 GHz (74.87%) - 631,971 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.84%) - 818,517,046 stalled-cycles-backend:u # 14.07% backend cycles idle (74.88%) - 20,110,770,654 instructions:u # 3.46 insn per cycle - # 0.04 stalled cycles per insn (74.90%) - 1.688879505 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13237) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe + 6,481,045,907 cycles # 2.698 GHz + 20,129,947,819 instructions # 3.11 insn per cycle + 2.403366026 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 6.627485e-04 -Avg ME (F77/C++) = 6.6274845946848876E-004 -Relative difference = 6.115670001294808e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274861460025036E-004 +Relative difference = 2.2029847170826283e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.375608e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.385909e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.385909e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214980e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 0.702501 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.563122e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.569724e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.569724e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 1.060074 sec INFO: No Floating Point Exceptions have been reported - 2,454,393,783 cycles:u # 3.476 GHz (74.97%) - 239,024 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.08%) - 246,569,377 stalled-cycles-backend:u # 10.05% backend cycles idle (75.07%) - 6,976,891,637 instructions:u # 2.84 insn per cycle - # 0.04 stalled cycles per insn (75.07%) - 0.709898530 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11604) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe + 2,825,592,732 cycles # 2.656 GHz + 6,997,999,144 instructions # 2.48 insn per cycle + 1.064449020 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271947045332125E-004 -Relative difference = 4.4583988847766445e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174574524E-004 +Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.771659e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.779871e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.779871e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.936726 sec +INFO: No Floating Point Exceptions have been reported + 2,499,492,257 cycles # 2.658 GHz + 6,305,238,570 instructions # 2.52 insn per cycle + 0.941141246 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174574524E-004 +Relative difference = 2.7544470208782633e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.360130e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.365020e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.365020e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 +TOTAL : 1.216987 sec +INFO: No Floating Point Exceptions have been reported + 2,054,846,027 cycles # 1.684 GHz + 3,277,226,537 instructions # 1.59 insn per cycle + 1.221302296 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2427) (512y: 46) (512z: 9578) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271952779373838E-004 +Relative difference = 4.193891735414155e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt index 7fcde53999..3255b1deef 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-15_16:32:25 +DATE: 2024-05-15_09:31:02 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/gcheck.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.525311e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.726404e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.728003e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.202247e-01 +- 3.251485e-01 ) GeV^-4 -TOTAL : 0.439368 sec +EvtsPerSec[Rmb+ME] (23) = ( 6.323506e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.373675e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.379424e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.159397e-01 +- 3.238804e-01 ) GeV^-4 +TOTAL : 0.472243 sec INFO: No Floating Point Exceptions have been reported - 1,270,344,241 cycles:u # 2.734 GHz (73.63%) - 2,448,524 stalled-cycles-frontend:u # 0.19% frontend cycles idle (73.94%) - 34,050,399 stalled-cycles-backend:u # 2.68% backend cycles idle (74.86%) - 1,609,817,107 instructions:u # 1.27 insn per cycle - # 0.02 stalled cycles per insn (75.69%) - 0.486682472 seconds time elapsed + 1,926,906,626 cycles # 2.811 GHz + 2,842,186,724 instructions # 1.47 insn per cycle + 0.743931745 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.686205e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.717014e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.717447e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.213664e+02 +- 1.195366e+02 ) GeV^-4 -TOTAL : 3.305003 sec +EvtsPerSec[Rmb+ME] (23) = ( 8.619196e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.692028e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.695553e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.094367e+02 +- 1.071509e+02 ) GeV^-4 +TOTAL : 1.807131 sec INFO: No Floating Point Exceptions have been reported - 11,105,900,651 cycles:u # 3.329 GHz (75.02%) - 11,040,635 stalled-cycles-frontend:u # 0.10% frontend cycles idle (75.07%) - 1,144,196,478 stalled-cycles-backend:u # 10.30% backend cycles idle (75.07%) - 9,026,212,245 instructions:u # 0.81 insn per cycle - # 0.13 stalled cycles per insn (75.02%) - 3.358176383 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/runTest.exe + 5,799,524,002 cycles # 2.848 GHz + 12,052,890,185 instructions # 2.08 insn per cycle + 2.092585249 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/GPU) = 6.626791e-04 -Avg ME (F77/GPU) = 6.6270899361878938E-004 -Relative difference = 4.511024836808726e-05 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262659968156085E-004 +Relative difference = 2.8371612387547027e-05 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.469495e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.470544e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.470544e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208458e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 6.651185 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.909596e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.910513e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.910513e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 8.597641 sec INFO: No Floating Point Exceptions have been reported - 23,314,047,464 cycles:u # 3.504 GHz (74.99%) - 1,023,835 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.99%) - 2,989,172,128 stalled-cycles-backend:u # 12.82% backend cycles idle (74.99%) - 75,802,850,533 instructions:u # 3.25 insn per cycle - # 0.04 stalled cycles per insn (74.99%) - 6.655153632 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3898) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe + 24,643,205,050 cycles # 2.865 GHz + 78,127,351,887 instructions # 3.17 insn per cycle + 8.601700557 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3603) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 6.627487e-04 -Avg ME (F77/C++) = 6.6274866115424713E-004 -Relative difference = 5.861309557415831e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274863266294753E-004 +Relative difference = 4.92840687132121e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 9.890088e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.907950e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.907950e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 1.668323 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.967025e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.979707e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.979707e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 +TOTAL : 2.362883 sec INFO: No Floating Point Exceptions have been reported - 5,857,650,027 cycles:u # 3.506 GHz (74.75%) - 2,083,109 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.98%) - 822,777,492 stalled-cycles-backend:u # 14.05% backend cycles idle (75.10%) - 20,043,885,758 instructions:u # 3.42 insn per cycle - # 0.04 stalled cycles per insn (75.10%) - 1.672316378 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13237) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe + 6,478,067,487 cycles # 2.738 GHz + 20,118,736,323 instructions # 3.11 insn per cycle + 2.366901273 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 6.627485e-04 -Avg ME (F77/C++) = 6.6274845946848876E-004 -Relative difference = 6.115670001294808e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274861460025036E-004 +Relative difference = 2.2029847170826283e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.373497e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.383649e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.383649e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214980e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 0.700820 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.562128e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.568498e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.568498e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 +TOTAL : 1.059093 sec INFO: No Floating Point Exceptions have been reported - 2,440,546,902 cycles:u # 3.470 GHz (74.98%) - 205,681 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.98%) - 257,963,467 stalled-cycles-backend:u # 10.57% backend cycles idle (74.98%) - 6,981,870,931 instructions:u # 2.86 insn per cycle - # 0.04 stalled cycles per insn (74.98%) - 0.704600207 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11604) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe + 2,820,901,565 cycles # 2.655 GHz + 6,985,719,621 instructions # 2.48 insn per cycle + 1.063184068 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271947045332125E-004 -Relative difference = 4.4583988847766445e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174574524E-004 +Relative difference = 2.7544470208782633e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.768476e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.776638e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.776638e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 +TOTAL : 0.937260 sec +INFO: No Floating Point Exceptions have been reported + 2,496,445,075 cycles # 2.654 GHz + 6,293,783,461 instructions # 2.52 insn per cycle + 0.941399878 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174574524E-004 +Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.360121e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.365073e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.365073e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 +TOTAL : 1.215676 sec +INFO: No Floating Point Exceptions have been reported + 2,051,427,144 cycles # 1.683 GHz + 3,264,452,978 instructions # 1.59 insn per cycle + 1.219734902 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2427) (512y: 46) (512z: 9578) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271952779373838E-004 +Relative difference = 4.193891735414155e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt index 2f6c028d0e..84a1accbff 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt @@ -1,195 +1,236 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-15_16:30:18 +DATE: 2024-05-15_09:25:31 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.562827e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.726981e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.727629e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.202335e-01 +- 3.251521e-01 ) GeV^-4 -TOTAL : 0.432096 sec +EvtsPerSec[Rmb+ME] (23) = ( 5.733155e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.384139e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.389936e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 +TOTAL : 0.471997 sec INFO: No Floating Point Exceptions have been reported - 1,239,375,288 cycles:u # 2.720 GHz (75.36%) - 2,457,751 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.44%) - 51,259,680 stalled-cycles-backend:u # 4.14% backend cycles idle (75.44%) - 1,635,658,699 instructions:u # 1.32 insn per cycle - # 0.03 stalled cycles per insn (75.48%) - 0.471477263 seconds time elapsed + 1,949,349,946 cycles # 2.815 GHz + 2,883,916,104 instructions # 1.48 insn per cycle + 0.749239635 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.287375e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.712743e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.713274e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.213799e+02 +- 1.195366e+02 ) GeV^-4 -TOTAL : 3.424943 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.466972e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.686495e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.689920e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.641710e+00 +- 4.994249e+00 ) GeV^-4 +TOTAL : 1.837386 sec INFO: No Floating Point Exceptions have been reported - 11,507,485,887 cycles:u # 3.351 GHz (74.89%) - 21,524,913 stalled-cycles-frontend:u # 0.19% frontend cycles idle (75.07%) - 1,134,644,407 stalled-cycles-backend:u # 9.86% backend cycles idle (75.09%) - 9,786,678,640 instructions:u # 0.85 insn per cycle - # 0.12 stalled cycles per insn (75.08%) - 3.469627658 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/runTest.exe + 5,922,319,867 cycles # 2.847 GHz + 12,727,018,620 instructions # 2.15 insn per cycle + 2.135925623 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/GPU) = 6.626791e-04 -Avg ME (F77/GPU) = 6.6270899361878938E-004 -Relative difference = 4.511024836808726e-05 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262659968156085E-004 +Relative difference = 2.8371612387547027e-05 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.461528e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.462582e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.462582e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208458e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 6.672502 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.908017e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.908934e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.908934e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 +TOTAL : 8.603676 sec INFO: No Floating Point Exceptions have been reported - 23,348,014,194 cycles:u # 3.499 GHz (74.96%) - 1,432,309 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.95%) - 3,047,985,824 stalled-cycles-backend:u # 13.05% backend cycles idle (74.96%) - 75,854,593,993 instructions:u # 3.25 insn per cycle - # 0.04 stalled cycles per insn (74.97%) - 6.676447910 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3898) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe + 24,642,724,373 cycles # 2.863 GHz + 78,128,946,887 instructions # 3.17 insn per cycle + 8.607823851 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3603) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 6.627487e-04 -Avg ME (F77/C++) = 6.6274866115424713E-004 -Relative difference = 5.861309557415831e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274863266294753E-004 +Relative difference = 4.92840687132121e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 9.878591e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.896526e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.896526e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 1.670139 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.881721e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.894028e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.894028e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 +TOTAL : 2.391103 sec INFO: No Floating Point Exceptions have been reported - 5,859,735,192 cycles:u # 3.503 GHz (74.68%) - 357,232 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.90%) - 817,209,696 stalled-cycles-backend:u # 13.95% backend cycles idle (75.13%) - 20,050,584,379 instructions:u # 3.42 insn per cycle - # 0.04 stalled cycles per insn (75.13%) - 1.674097325 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13237) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe + 6,467,875,915 cycles # 2.701 GHz + 20,120,565,733 instructions # 3.11 insn per cycle + 2.395185690 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 6.627485e-04 -Avg ME (F77/C++) = 6.6274845946848876E-004 -Relative difference = 6.115670001294808e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274861460025036E-004 +Relative difference = 2.2029847170826283e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.374346e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.384755e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.384755e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214980e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 0.700499 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.557471e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.563801e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.563801e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 1.061218 sec INFO: No Floating Point Exceptions have been reported - 2,440,629,579 cycles:u # 3.471 GHz (74.97%) - 229,723 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.97%) - 247,928,603 stalled-cycles-backend:u # 10.16% backend cycles idle (74.97%) - 6,980,386,979 instructions:u # 2.86 insn per cycle - # 0.04 stalled cycles per insn (74.97%) - 0.704321246 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11604) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe + 2,818,223,714 cycles # 2.647 GHz + 6,988,155,543 instructions # 2.48 insn per cycle + 1.065311424 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271947045332125E-004 -Relative difference = 4.4583988847766445e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174574524E-004 +Relative difference = 2.7544470208782633e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.764482e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.772453e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.772453e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.937612 sec +INFO: No Floating Point Exceptions have been reported + 2,491,490,656 cycles # 2.647 GHz + 6,295,363,549 instructions # 2.53 insn per cycle + 0.941754882 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174574524E-004 +Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.361210e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.365914e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.365914e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 +TOTAL : 1.213135 sec +INFO: No Floating Point Exceptions have been reported + 2,046,512,114 cycles # 1.682 GHz + 3,265,943,047 instructions # 1.60 insn per cycle + 1.217215733 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2427) (512y: 46) (512z: 9578) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271952779373838E-004 +Relative difference = 4.193891735414155e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt index 3efaa8b447..e9cd0fab8d 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-15_15:59:07 +DATE: 2024-05-15_08:59:11 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.534350e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.718064e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.719919e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.535870e-02 +- 4.279978e-02 ) GeV^-4 -TOTAL : 0.422348 sec +EvtsPerSec[Rmb+ME] (23) = ( 6.366106e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.412879e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.418387e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 +TOTAL : 0.484833 sec INFO: No Floating Point Exceptions have been reported - 1,182,384,413 cycles:u # 2.672 GHz (74.19%) - 2,177,985 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.55%) - 5,597,910 stalled-cycles-backend:u # 0.47% backend cycles idle (74.72%) - 1,592,050,914 instructions:u # 1.35 insn per cycle - # 0.00 stalled cycles per insn (75.07%) - 0.468920316 seconds time elapsed + 2,009,324,153 cycles # 2.858 GHz + 2,980,982,469 instructions # 1.48 insn per cycle + 0.761343116 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.708072e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.741034e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.741481e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.763201e+01 +- 6.205778e+01 ) GeV^-4 -TOTAL : 2.631260 sec +EvtsPerSec[Rmb+ME] (23) = ( 8.612323e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.672623e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.675391e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 +TOTAL : 1.716642 sec INFO: No Floating Point Exceptions have been reported - 8,744,418,917 cycles:u # 3.322 GHz (75.08%) - 2,411,124 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.11%) - 4,627,952 stalled-cycles-backend:u # 0.05% backend cycles idle (75.12%) - 7,473,196,726 instructions:u # 0.85 insn per cycle - # 0.00 stalled cycles per insn (75.09%) - 2.679032858 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/runTest.exe + 5,662,740,437 cycles # 2.893 GHz + 11,353,972,089 instructions # 2.01 insn per cycle + 2.014594393 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/GPU) = 6.626791e-04 -Avg ME (F77/GPU) = 6.6270899361878938E-004 -Relative difference = 4.511024836808726e-05 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262659968156085E-004 +Relative difference = 2.8371612387547027e-05 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.471911e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.472964e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.472964e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208458e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 6.644527 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.952072e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.953022e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.953022e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 +TOTAL : 8.410163 sec INFO: No Floating Point Exceptions have been reported - 23,292,200,546 cycles:u # 3.504 GHz (74.97%) - 941,401 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.97%) - 2,793,996,180 stalled-cycles-backend:u # 12.00% backend cycles idle (74.97%) - 75,764,862,603 instructions:u # 3.25 insn per cycle - # 0.04 stalled cycles per insn (74.97%) - 6.651612901 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3848) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest.exe + 24,530,090,353 cycles # 2.916 GHz + 77,854,592,626 instructions # 3.17 insn per cycle + 8.414276248 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3114) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627487e-04 -Avg ME (F77/C++) = 6.6274866108667618E-004 -Relative difference = 5.871505118544242e-08 +Avg ME (F77/C++) = 6.6274866268634797E-004 +Relative difference = 5.630135835748959e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 9.941464e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.959132e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.959132e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 1.659366 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.109913e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.123055e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.123055e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 +TOTAL : 2.314562 sec INFO: No Floating Point Exceptions have been reported - 5,807,726,656 cycles:u # 3.493 GHz (75.03%) - 691,045 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.98%) - 927,786,568 stalled-cycles-backend:u # 15.98% backend cycles idle (74.98%) - 20,036,844,991 instructions:u # 3.45 insn per cycle - # 0.05 stalled cycles per insn (74.98%) - 1.666336859 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13231) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest.exe + 6,419,654,507 cycles # 2.769 GHz + 20,085,518,289 instructions # 3.13 insn per cycle + 2.318756911 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13452) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 6.627485e-04 -Avg ME (F77/C++) = 6.6274845946848876E-004 -Relative difference = 6.115670001294808e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274861465384638E-004 +Relative difference = 2.211071647257023e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.385038e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.395436e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.395436e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214980e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 0.697217 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.526248e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.532430e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.532430e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 1.082391 sec INFO: No Floating Point Exceptions have been reported - 2,432,905,799 cycles:u # 3.473 GHz (74.99%) - 224,108 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.88%) - 295,138,287 stalled-cycles-backend:u # 12.13% backend cycles idle (74.88%) - 6,977,300,556 instructions:u # 2.87 insn per cycle - # 0.04 stalled cycles per insn (74.88%) - 0.704182951 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11587) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest.exe + 2,916,919,637 cycles # 2.686 GHz + 7,129,894,032 instructions # 2.44 insn per cycle + 1.086400028 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:12261) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271947045332125E-004 -Relative difference = 4.4583988847766445e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271939668077068E-004 +Relative difference = 5.008498817890231e-09 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.724869e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.732671e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.732671e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.958871 sec +INFO: No Floating Point Exceptions have been reported + 2,599,161,158 cycles # 2.701 GHz + 6,439,212,709 instructions # 2.48 insn per cycle + 0.962903376 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11276) (512y: 27) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271939668077068E-004 +Relative difference = 5.008498817890231e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.354018e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.358675e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.358675e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 +TOTAL : 1.219384 sec +INFO: No Floating Point Exceptions have been reported + 2,118,745,707 cycles # 1.733 GHz + 3,427,593,513 instructions # 1.62 insn per cycle + 1.223495203 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2924) (512y: 22) (512z: 9654) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271952032322112E-004 +Relative difference = 3.066639970473621e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt index 71b4455a14..4458ee928f 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-15_16:15:40 +DATE: 2024-05-15_09:13:29 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/gcheck.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.563101e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.729671e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.731133e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.535870e-02 +- 4.279978e-02 ) GeV^-4 -TOTAL : 0.421043 sec +EvtsPerSec[Rmb+ME] (23) = ( 5.547623e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.587705e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.592904e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 +TOTAL : 0.494801 sec INFO: No Floating Point Exceptions have been reported - 1,178,996,820 cycles:u # 2.673 GHz (74.34%) - 2,271,227 stalled-cycles-frontend:u # 0.19% frontend cycles idle (74.50%) - 5,966,417 stalled-cycles-backend:u # 0.51% backend cycles idle (74.63%) - 1,576,303,170 instructions:u # 1.34 insn per cycle - # 0.00 stalled cycles per insn (74.92%) - 0.467983265 seconds time elapsed + 2,071,828,549 cycles # 2.818 GHz + 3,075,474,522 instructions # 1.48 insn per cycle + 0.793077205 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/gcheck.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.698669e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.723715e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.724146e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.763201e+01 +- 6.205778e+01 ) GeV^-4 -TOTAL : 2.608747 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.752855e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.811492e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.814175e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 +TOTAL : 1.863794 sec INFO: No Floating Point Exceptions have been reported - 8,761,412,499 cycles:u # 3.326 GHz (75.10%) - 2,390,369 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.08%) - 4,812,855 stalled-cycles-backend:u # 0.05% backend cycles idle (75.02%) - 7,478,919,047 instructions:u # 0.85 insn per cycle - # 0.00 stalled cycles per insn (75.00%) - 2.658405479 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/runTest.exe + 5,990,387,426 cycles # 2.848 GHz + 12,901,532,368 instructions # 2.15 insn per cycle + 2.159413441 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/GPU) = 6.626791e-04 -Avg ME (F77/GPU) = 6.6270899361878938E-004 -Relative difference = 4.511024836808726e-05 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262660579844562E-004 +Relative difference = 2.836238137986709e-05 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.224731e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.225405e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.225405e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.204931e-01 +- 3.252405e-01 ) GeV^-4 -TOTAL : 26.356894 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.440520e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.441267e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.441267e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 +TOTAL : 30.153753 sec INFO: No Floating Point Exceptions have been reported - 92,378,281,750 cycles:u # 3.505 GHz (74.99%) - 527,283,671 stalled-cycles-frontend:u # 0.57% frontend cycles idle (74.99%) - 6,727,526,078 stalled-cycles-backend:u # 7.28% backend cycles idle (74.99%) - 133,727,657,284 instructions:u # 1.45 insn per cycle - # 0.05 stalled cycles per insn (74.99%) - 26.364122211 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:16437) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest.exe + 86,241,694,269 cycles # 2.860 GHz + 135,579,841,827 instructions # 1.57 insn per cycle + 30.157843376 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:15593) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 6.627534e-04 -Avg ME (F77/C++) = 6.6275342244977858E-004 -Relative difference = 3.387350194093721e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.627535e-04 +Avg ME (F77/C++) = 6.6275351196781740E-004 +Relative difference = 1.805772034719401e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 8.428594e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.441253e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.441253e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.211992e-01 +- 3.254573e-01 ) GeV^-4 -TOTAL : 1.955855 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.670546e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.682391e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.682391e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 +TOTAL : 2.467623 sec INFO: No Floating Point Exceptions have been reported - 6,866,879,302 cycles:u # 3.505 GHz (74.79%) - 366,236 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.85%) - 3,373,512,128 stalled-cycles-backend:u # 49.13% backend cycles idle (75.04%) - 19,109,724,267 instructions:u # 2.78 insn per cycle - # 0.18 stalled cycles per insn (75.09%) - 1.962929375 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:68898) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest.exe + 6,770,215,775 cycles # 2.740 GHz + 19,386,112,198 instructions # 2.86 insn per cycle + 2.471854770 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:69681) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274857053714997E-004 -Relative difference = 4.445554471174176e-08 +Avg ME (F77/C++) = 6.6274862707273868E-004 +Relative difference = 4.0849182767952624e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.480723e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.484775e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.484775e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.211846e-01 +- 3.254638e-01 ) GeV^-4 -TOTAL : 1.117590 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.382738e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.387638e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.387638e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 +TOTAL : 1.194394 sec +INFO: No Floating Point Exceptions have been reported + 3,181,944,591 cycles # 2.656 GHz + 6,807,632,796 instructions # 2.14 insn per cycle + 1.198610229 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:49077) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.627273e-04 +Avg ME (F77/C++) = 6.6272731558747466E-004 +Relative difference = 2.3520194007978538e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.675256e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.682394e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.682394e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 +TOTAL : 0.987158 sec INFO: No Floating Point Exceptions have been reported - 3,895,917,613 cycles:u # 3.476 GHz (75.02%) - 406,967 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.02%) - 2,245,604,879 stalled-cycles-backend:u # 57.64% backend cycles idle (75.02%) - 6,710,083,492 instructions:u # 1.72 insn per cycle - # 0.33 stalled cycles per insn (75.02%) - 1.124488383 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:48625) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest.exe + 2,637,251,724 cycles # 2.662 GHz + 5,986,082,953 instructions # 2.27 insn per cycle + 0.991313558 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:42677) (512y: 11) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 6.627274e-04 -Avg ME (F77/C++) = 6.6272735722101156E-004 -Relative difference = 6.454990161554483e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.627273e-04 +Avg ME (F77/C++) = 6.6272731558747466E-004 +Relative difference = 2.3520194007978538e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.337403e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.342008e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.342008e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060904e+00 +- 2.367377e+00 ) GeV^-4 +TOTAL : 1.234823 sec +INFO: No Floating Point Exceptions have been reported + 2,079,754,188 cycles # 1.680 GHz + 3,501,095,607 instructions # 1.68 insn per cycle + 1.239081052 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5210) (512y: 3) (512z:44829) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.627275e-04 +Avg ME (F77/C++) = 6.6272750363879224E-004 +Relative difference = 5.490631193034436e-09 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt index e4136e56b7..fb291f957e 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-15_16:16:21 +DATE: 2024-05-15_09:14:18 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/gcheck.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.555444e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.735249e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.735948e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.535870e-02 +- 4.279978e-02 ) GeV^-4 -TOTAL : 0.438925 sec +EvtsPerSec[Rmb+ME] (23) = ( 5.501123e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.540237e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.545384e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 +TOTAL : 0.496418 sec INFO: No Floating Point Exceptions have been reported - 1,192,597,442 cycles:u # 2.710 GHz (74.12%) - 2,199,726 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.58%) - 5,840,015 stalled-cycles-backend:u # 0.49% backend cycles idle (75.09%) - 1,542,600,689 instructions:u # 1.29 insn per cycle - # 0.00 stalled cycles per insn (76.18%) - 0.483953210 seconds time elapsed + 2,075,811,223 cycles # 2.818 GHz + 3,117,901,600 instructions # 1.50 insn per cycle + 0.794898428 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/gcheck.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.708918e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.735812e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.736260e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.763201e+01 +- 6.205778e+01 ) GeV^-4 -TOTAL : 2.675956 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.655063e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.712805e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.715432e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 +TOTAL : 1.882952 sec INFO: No Floating Point Exceptions have been reported - 8,744,995,176 cycles:u # 3.323 GHz (75.04%) - 2,329,939 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.07%) - 4,549,200 stalled-cycles-backend:u # 0.05% backend cycles idle (75.03%) - 7,448,478,363 instructions:u # 0.85 insn per cycle - # 0.00 stalled cycles per insn (75.03%) - 2.723248557 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/runTest.exe + 6,029,571,719 cycles # 2.845 GHz + 13,064,379,561 instructions # 2.17 insn per cycle + 2.178422753 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/GPU) = 6.626791e-04 -Avg ME (F77/GPU) = 6.6270899361878938E-004 -Relative difference = 4.511024836808726e-05 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262660579844562E-004 +Relative difference = 2.836238137986709e-05 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.356777e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.357473e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.357473e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.204931e-01 +- 3.252404e-01 ) GeV^-4 -TOTAL : 25.809750 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.411371e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.412111e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.412111e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 +TOTAL : 30.314976 sec INFO: No Floating Point Exceptions have been reported - 90,470,344,269 cycles:u # 3.505 GHz (74.99%) - 173,834,865 stalled-cycles-frontend:u # 0.19% frontend cycles idle (74.99%) - 6,717,529,314 stalled-cycles-backend:u # 7.43% backend cycles idle (74.99%) - 134,197,137,791 instructions:u # 1.48 insn per cycle - # 0.05 stalled cycles per insn (74.99%) - 25.816836967 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:16752) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest.exe + 86,626,378,196 cycles # 2.858 GHz + 135,996,101,788 instructions # 1.57 insn per cycle + 30.319079477 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:15571) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 6.627534e-04 -Avg ME (F77/C++) = 6.6275342811702997E-004 -Relative difference = 4.242457295829522e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.627535e-04 +Avg ME (F77/C++) = 6.6275348988418387E-004 +Relative difference = 1.5263316105958472e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 8.233502e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.245586e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.245586e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.211992e-01 +- 3.254573e-01 ) GeV^-4 -TOTAL : 2.001883 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.540707e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.551820e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.551820e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 +TOTAL : 2.515575 sec INFO: No Floating Point Exceptions have been reported - 7,007,440,529 cycles:u # 3.495 GHz (74.98%) - 1,787,441 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.88%) - 3,067,895,945 stalled-cycles-backend:u # 43.78% backend cycles idle (74.87%) - 19,195,171,512 instructions:u # 2.74 insn per cycle - # 0.16 stalled cycles per insn (74.87%) - 2.008917088 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:68882) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest.exe + 6,857,543,798 cycles # 2.723 GHz + 19,438,697,676 instructions # 2.83 insn per cycle + 2.519792052 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:69723) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274857044990032E-004 -Relative difference = 4.4587192899226015e-08 +Avg ME (F77/C++) = 6.6274862764021530E-004 +Relative difference = 4.170542995014107e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.499144e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.503166e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.503166e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.211846e-01 +- 3.254638e-01 ) GeV^-4 -TOTAL : 1.103765 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.404571e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.409616e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.409616e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 +TOTAL : 1.175922 sec +INFO: No Floating Point Exceptions have been reported + 3,127,737,539 cycles # 2.651 GHz + 6,718,846,853 instructions # 2.15 insn per cycle + 1.180394712 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:47667) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.627273e-04 +Avg ME (F77/C++) = 6.6272731651051409E-004 +Relative difference = 2.4912983202981302e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.670600e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.677947e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.677947e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 +TOTAL : 0.989790 sec INFO: No Floating Point Exceptions have been reported - 3,857,870,137 cycles:u # 3.485 GHz (74.76%) - 570,661 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.71%) - 2,192,082,521 stalled-cycles-backend:u # 56.82% backend cycles idle (74.71%) - 6,681,566,942 instructions:u # 1.73 insn per cycle - # 0.33 stalled cycles per insn (74.90%) - 1.110730332 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:47416) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest.exe + 2,639,877,826 cycles # 2.658 GHz + 5,969,675,763 instructions # 2.26 insn per cycle + 0.994097973 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:41842) (512y: 13) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 6.627274e-04 -Avg ME (F77/C++) = 6.6272735755491807E-004 -Relative difference = 6.404606472340801e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.627273e-04 +Avg ME (F77/C++) = 6.6272731651051409E-004 +Relative difference = 2.4912983202981302e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.337480e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.342044e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.342044e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060904e+00 +- 2.367377e+00 ) GeV^-4 +TOTAL : 1.234650 sec +INFO: No Floating Point Exceptions have been reported + 2,078,705,261 cycles # 1.679 GHz + 3,494,560,286 instructions # 1.68 insn per cycle + 1.238808756 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4174) (512y: 4) (512z:44472) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.627275e-04 +Avg ME (F77/C++) = 6.6272750384530066E-004 +Relative difference = 5.80223501432476e-09 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index efb7e8f517..871f3f0674 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-15_15:59:24 +DATE: 2024-05-15_08:59:36 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.406054e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.555668e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.557044e+04 ) sec^-1 -MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 -TOTAL : 0.634300 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.448055e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.471732e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.473882e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.528517 sec INFO: No Floating Point Exceptions have been reported - 1,913,340,364 cycles:u # 2.926 GHz (74.47%) - 2,124,225 stalled-cycles-frontend:u # 0.11% frontend cycles idle (75.44%) - 6,045,559 stalled-cycles-backend:u # 0.32% backend cycles idle (75.55%) - 2,113,671,614 instructions:u # 1.10 insn per cycle - # 0.00 stalled cycles per insn (74.98%) - 0.680569111 seconds time elapsed + 2,206,186,016 cycles # 2.854 GHz + 3,406,499,343 instructions # 1.54 insn per cycle + 0.832615871 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.243150e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.245621e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.245684e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 -TOTAL : 7.684762 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.121788e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.148994e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.150173e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.032259 sec INFO: No Floating Point Exceptions have been reported - 26,524,644,220 cycles:u # 3.438 GHz (75.00%) - 2,990,750 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.99%) - 5,314,402 stalled-cycles-backend:u # 0.02% backend cycles idle (75.01%) - 21,105,750,957 instructions:u # 0.80 insn per cycle - # 0.00 stalled cycles per insn (75.02%) - 7.739734597 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/runTest.exe + 9,483,811,613 cycles # 2.887 GHz + 21,728,837,964 instructions # 2.29 insn per cycle + 3.343241654 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266732376103494E-004 Relative difference = 2.659538381540814e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.175512e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.176365e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.176365e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 7.550207 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.842230e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.843084e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.843084e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.911947 sec INFO: No Floating Point Exceptions have been reported - 26,485,092,444 cycles:u # 3.506 GHz (74.98%) - 25,690,494 stalled-cycles-frontend:u # 0.10% frontend cycles idle (75.01%) - 3,653,274,032 stalled-cycles-backend:u # 13.79% backend cycles idle (75.01%) - 82,369,927,258 instructions:u # 3.11 insn per cycle - # 0.04 stalled cycles per insn (75.01%) - 7.557542229 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 6623) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest.exe + 25,916,297,719 cycles # 2.907 GHz + 79,437,751,444 instructions # 3.07 insn per cycle + 8.916043515 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4858) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731406016235E-004 Relative difference = 2.8059296349552523e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 5.093761e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.098498e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.098498e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.231168 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.498308e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.501464e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.501464e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.697616 sec INFO: No Floating Point Exceptions have been reported - 11,330,244,281 cycles:u # 3.503 GHz (74.96%) - 3,524,069 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.02%) - 1,430,983,502 stalled-cycles-backend:u # 12.63% backend cycles idle (75.02%) - 38,463,632,777 instructions:u # 3.39 insn per cycle - # 0.04 stalled cycles per insn (75.03%) - 3.238452285 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:12755) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest.exe + 12,720,201,550 cycles # 2.706 GHz + 38,549,576,662 instructions # 3.03 insn per cycle + 4.701883730 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13163) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730246908442E-004 Relative difference = 2.98084507782618e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.222556e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.225223e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.225223e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.352936 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.987897e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.003874e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.003874e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.063104 sec +INFO: No Floating Point Exceptions have been reported + 5,528,470,581 cycles # 2.677 GHz + 13,482,584,347 instructions # 2.44 insn per cycle + 2.067345486 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11242) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266730409276857E-004 +Relative difference = 2.956342832710188e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.262373e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.284301e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.284301e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.780175 sec INFO: No Floating Point Exceptions have been reported - 4,737,672,929 cycles:u # 3.493 GHz (74.70%) - 588,360 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.66%) - 463,826,293 stalled-cycles-backend:u # 9.79% backend cycles idle (74.92%) - 13,523,757,879 instructions:u # 2.85 insn per cycle - # 0.03 stalled cycles per insn (75.20%) - 1.360047826 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10944) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest.exe + 4,830,159,700 cycles # 2.708 GHz + 12,135,848,860 instructions # 2.51 insn per cycle + 1.784339719 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10154) (512y: 79) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266730409276836E-004 -Relative difference = 2.9563428359824236e-07 +Avg ME (F77/C++) = 6.6266730409276857E-004 +Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.855143e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.867012e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.867012e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.402299 sec +INFO: No Floating Point Exceptions have been reported + 4,146,104,419 cycles # 1.724 GHz + 6,336,145,765 instructions # 1.53 insn per cycle + 2.406581725 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1803) (512y: 93) (512z: 9358) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266730409276857E-004 +Relative difference = 2.956342832710188e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt index c13c1962d8..b1fc786479 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-15_15:59:51 +DATE: 2024-05-15_09:00:09 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.391539e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.447684e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.448332e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 -TOTAL : 0.514066 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.479154e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.502080e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.504549e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.526739 sec INFO: No Floating Point Exceptions have been reported - 1,514,719,274 cycles:u # 2.832 GHz (73.48%) - 2,184,743 stalled-cycles-frontend:u # 0.14% frontend cycles idle (74.85%) - 6,269,032 stalled-cycles-backend:u # 0.41% backend cycles idle (74.87%) - 1,874,596,872 instructions:u # 1.24 insn per cycle - # 0.00 stalled cycles per insn (74.60%) - 0.560203306 seconds time elapsed + 2,200,293,747 cycles # 2.851 GHz + 3,346,973,298 instructions # 1.52 insn per cycle + 0.832044919 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.736537e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.741332e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.741454e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 -TOTAL : 6.340456 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.140062e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.167552e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.168734e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.020713 sec INFO: No Floating Point Exceptions have been reported - 21,777,848,510 cycles:u # 3.418 GHz (75.05%) - 2,846,897 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.06%) - 5,785,834 stalled-cycles-backend:u # 0.03% backend cycles idle (75.02%) - 17,480,234,133 instructions:u # 0.80 insn per cycle - # 0.00 stalled cycles per insn (74.99%) - 6.395018977 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/runTest.exe + 9,435,366,326 cycles # 2.881 GHz + 21,605,435,243 instructions # 2.29 insn per cycle + 3.330954832 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266732376103494E-004 Relative difference = 2.659538381540814e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.206402e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.207273e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.207273e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 7.444427 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.833316e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.834177e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.834177e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.956408 sec INFO: No Floating Point Exceptions have been reported - 26,080,803,671 cycles:u # 3.502 GHz (74.97%) - 8,879,835 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.97%) - 3,448,291,666 stalled-cycles-backend:u # 13.22% backend cycles idle (74.97%) - 82,322,663,207 instructions:u # 3.16 insn per cycle - # 0.04 stalled cycles per insn (74.99%) - 7.452187120 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 6491) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest.exe + 25,877,692,564 cycles # 2.889 GHz + 79,450,699,359 instructions # 3.07 insn per cycle + 8.960811501 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4505) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731406016235E-004 Relative difference = 2.8059296349552523e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 5.116013e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.120719e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.120719e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.217094 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.503582e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.506712e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.506712e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.690551 sec INFO: No Floating Point Exceptions have been reported - 11,270,729,218 cycles:u # 3.500 GHz (74.93%) - 3,423,653 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.91%) - 1,343,947,151 stalled-cycles-backend:u # 11.92% backend cycles idle (74.91%) - 38,545,738,505 instructions:u # 3.42 insn per cycle - # 0.03 stalled cycles per insn (74.93%) - 3.224245404 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:12729) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest.exe + 12,673,056,790 cycles # 2.700 GHz + 38,520,866,041 instructions # 3.04 insn per cycle + 4.694808939 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:12930) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730246908442E-004 Relative difference = 2.98084507782618e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.221203e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.223851e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.223851e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.354247 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.015244e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.031118e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.031118e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.055513 sec +INFO: No Floating Point Exceptions have been reported + 5,573,540,691 cycles # 2.707 GHz + 13,605,499,057 instructions # 2.44 insn per cycle + 2.059674853 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11327) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266730409276857E-004 +Relative difference = 2.956342832710188e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.105038e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.125763e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.125763e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.810578 sec INFO: No Floating Point Exceptions have been reported - 4,741,335,297 cycles:u # 3.492 GHz (74.68%) - 405,247 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.73%) - 523,135,036 stalled-cycles-backend:u # 11.03% backend cycles idle (75.02%) - 13,528,084,025 instructions:u # 2.85 insn per cycle - # 0.04 stalled cycles per insn (75.26%) - 1.361943677 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10926) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest.exe + 4,912,374,631 cycles # 2.708 GHz + 12,272,252,618 instructions # 2.50 insn per cycle + 1.814800349 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10143) (512y: 239) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266730409276836E-004 -Relative difference = 2.9563428359824236e-07 +Avg ME (F77/C++) = 6.6266730409276857E-004 +Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.831320e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.842743e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.842743e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.410208 sec +INFO: No Floating Point Exceptions have been reported + 4,149,223,707 cycles # 1.719 GHz + 6,443,281,470 instructions # 1.55 insn per cycle + 2.414332469 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1628) (512y: 191) (512z: 9356) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266730409276857E-004 +Relative difference = 2.956342832710188e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index c9f0b40010..5b64325983 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-15_16:00:37 +DATE: 2024-05-15_09:02:00 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/gcheck.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/gcheck.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.980820e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.986296e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.986398e+01 ) sec^-1 -MeanMatrixElemValue = ( 3.297256e-04 +- 2.011325e-04 ) GeV^-6 -TOTAL : 9.456571 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.068667e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.069058e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.069231e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 2.432023 sec INFO: No Floating Point Exceptions have been reported - 32,758,675,987 cycles:u # 3.456 GHz (75.00%) - 3,432,440 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.97%) - 7,654,967 stalled-cycles-backend:u # 0.02% backend cycles idle (75.00%) - 25,905,703,431 instructions:u # 0.79 insn per cycle - # 0.00 stalled cycles per insn (75.01%) - 9.506575067 seconds time elapsed + 7,962,061,333 cycles # 2.891 GHz + 17,842,918,220 instructions # 2.24 insn per cycle + 2.812445119 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/gcheck.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.514663e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.517911e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.517935e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.973353e-04 +- 5.853892e-04 ) GeV^-6 -TOTAL : 9.036392 sec +EvtsPerSec[Rmb+ME] (23) = ( 9.282838e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.284842e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.285132e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 3.997752 sec INFO: No Floating Point Exceptions have been reported - 31,311,548,433 cycles:u # 3.457 GHz (74.97%) - 3,533,955 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.04%) - 7,913,429 stalled-cycles-backend:u # 0.03% backend cycles idle (75.03%) - 24,752,693,903 instructions:u # 0.79 insn per cycle - # 0.00 stalled cycles per insn (75.00%) - 9.082002133 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/runTest.exe + 12,544,904,873 cycles # 2.901 GHz + 27,763,638,781 instructions # 2.21 insn per cycle + 4.380155266 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 9.872263e-03 -Avg ME (F77/GPU) = 9.8722595284406710E-003 -Relative difference = 3.516477760164775e-07 +Avg ME (F77/GPU) = 9.8722595284406640E-003 +Relative difference = 3.5164777671934515e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.027435e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.027462e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.027462e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 5.144605 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.874055e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.874298e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.874298e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 6.708004 sec INFO: No Floating Point Exceptions have been reported - 18,028,402,409 cycles:u # 3.502 GHz (74.95%) - 27,611,368 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.98%) - 2,245,790,882 stalled-cycles-backend:u # 12.46% backend cycles idle (74.98%) - 55,127,058,667 instructions:u # 3.06 insn per cycle - # 0.04 stalled cycles per insn (74.98%) - 5.151610353 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:44874) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest.exe + 18,789,307,142 cycles # 2.800 GHz + 53,916,081,627 instructions # 2.87 insn per cycle + 6.711948866 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:32448) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.223400e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.223527e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.223527e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.380373 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.563882e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.563969e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.563969e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 3.380877 sec INFO: No Floating Point Exceptions have been reported - 8,333,152,705 cycles:u # 3.496 GHz (74.86%) - 2,559,647 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.83%) - 808,877,867 stalled-cycles-backend:u # 9.71% backend cycles idle (74.84%) - 27,042,969,545 instructions:u # 3.25 insn per cycle - # 0.03 stalled cycles per insn (75.01%) - 2.387686992 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:97234) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest.exe + 9,819,609,469 cycles # 2.902 GHz + 27,092,420,480 instructions # 2.76 insn per cycle + 3.384888858 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:96443) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 5.140048e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.140715e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.140715e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.032474 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.396404e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.396834e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.396834e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.559910 sec +INFO: No Floating Point Exceptions have been reported + 4,214,781,215 cycles # 2.696 GHz + 9,560,689,751 instructions # 2.27 insn per cycle + 1.563893957 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84390) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.840945e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.841459e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.841459e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.379348 sec INFO: No Floating Point Exceptions have been reported - 3,618,339,129 cycles:u # 3.494 GHz (74.63%) - 1,807,361 stalled-cycles-frontend:u # 0.05% frontend cycles idle (75.02%) - 325,325,784 stalled-cycles-backend:u # 8.99% backend cycles idle (75.29%) - 9,521,952,296 instructions:u # 2.63 insn per cycle - # 0.03 stalled cycles per insn (75.29%) - 1.039807225 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84279) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest.exe + 3,753,155,368 cycles # 2.714 GHz + 8,484,753,328 instructions # 2.26 insn per cycle + 1.383331847 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:79991) (512y: 91) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.355106e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.355613e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.355613e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.579328 sec +INFO: No Floating Point Exceptions have been reported + 2,715,728,873 cycles # 1.716 GHz + 4,273,895,295 instructions # 1.57 insn per cycle + 1.583286672 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2284) (512y: 105) (512z:79105) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt index 93fc2cd34c..0da0f4b152 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt @@ -1,204 +1,250 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-15_16:26:19 +DATE: 2024-05-15_09:21:39 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/gcheck.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/gcheck.exe -p 1 256 2 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.930121e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.930842e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.930842e+01 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 9.502413 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.066985e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.067950e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.067950e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 2.381273 sec INFO: No Floating Point Exceptions have been reported - 32,946,992,049 cycles:u # 3.459 GHz (74.96%) - 3,785,031 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.99%) - 9,038,690 stalled-cycles-backend:u # 0.03% backend cycles idle (74.99%) - 26,035,342,716 instructions:u # 0.79 insn per cycle - # 0.00 stalled cycles per insn (75.01%) - 9.549240792 seconds time elapsed + 7,706,027,501 cycles # 2.851 GHz + 16,286,663,405 instructions # 2.11 insn per cycle + 2.759816734 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/gcheck.exe -p 1 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.507144e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.510882e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.510882e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.221264e+00 +- 1.219329e+00 ) GeV^-6 -TOTAL : 9.080724 sec +EvtsPerSec[Rmb+ME] (23) = ( 9.231814e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.267464e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.267464e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 3.986121 sec INFO: No Floating Point Exceptions have been reported - 31,494,745,424 cycles:u # 3.459 GHz (74.97%) - 4,919,660 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.01%) - 49,642,487 stalled-cycles-backend:u # 0.16% backend cycles idle (75.03%) - 24,874,270,522 instructions:u # 0.79 insn per cycle - # 0.00 stalled cycles per insn (75.03%) - 9.126106525 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/runTest.exe + 12,303,392,195 cycles # 2.856 GHz + 29,197,517,966 instructions # 2.37 insn per cycle + 4.366638096 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 9.872263e-03 -Avg ME (F77/GPU) = 9.8722595284406710E-003 -Relative difference = 3.516477760164775e-07 +Avg ME (F77/GPU) = 9.8722595284406640E-003 +Relative difference = 3.5164777671934515e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.023132e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.023159e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.023159e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 5.166347 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.356883e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.357084e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.357084e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 7.181391 sec INFO: No Floating Point Exceptions have been reported - 18,095,619,775 cycles:u # 3.501 GHz (74.93%) - 27,836,214 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.93%) - 1,988,371,558 stalled-cycles-backend:u # 10.99% backend cycles idle (74.95%) - 55,156,193,382 instructions:u # 3.05 insn per cycle - # 0.04 stalled cycles per insn (75.03%) - 5.172999600 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:44874) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest.exe + 18,963,162,368 cycles # 2.640 GHz + 53,918,714,635 instructions # 2.84 insn per cycle + 7.185419358 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:32448) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.235258e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.235385e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.235385e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.368254 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.532810e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.532897e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.532897e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 3.449678 sec INFO: No Floating Point Exceptions have been reported - 8,294,074,370 cycles:u # 3.498 GHz (74.99%) - 1,791,455 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.04%) - 769,410,399 stalled-cycles-backend:u # 9.28% backend cycles idle (75.04%) - 26,992,585,217 instructions:u # 3.25 insn per cycle - # 0.03 stalled cycles per insn (75.04%) - 2.374877299 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:97234) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest.exe + 9,860,760,253 cycles # 2.856 GHz + 27,094,035,576 instructions # 2.75 insn per cycle + 3.453719819 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:96443) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 5.198364e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.199069e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.199069e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.021970 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.310317e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.310722e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.310722e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.600893 sec INFO: No Floating Point Exceptions have been reported - 3,561,730,996 cycles:u # 3.474 GHz (75.04%) - 1,144,548 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.04%) - 252,785,073 stalled-cycles-backend:u # 7.10% backend cycles idle (75.04%) - 9,519,651,231 instructions:u # 2.67 insn per cycle - # 0.03 stalled cycles per insn (75.04%) - 1.028867905 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84279) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest.exe + 4,227,936,122 cycles # 2.635 GHz + 9,561,772,642 instructions # 2.26 insn per cycle + 1.604964741 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84390) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=256) +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.779364e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.779891e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.779891e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.402471 sec +INFO: No Floating Point Exceptions have been reported + 3,727,801,740 cycles # 2.652 GHz + 8,485,976,731 instructions # 2.28 insn per cycle + 1.406514546 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:79991) (512y: 91) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=256) +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.270173e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.270653e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.270653e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.620234 sec +INFO: No Floating Point Exceptions have been reported + 2,696,976,380 cycles # 1.661 GHz + 4,275,004,138 instructions # 1.59 insn per cycle + 1.624259831 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2284) (512y: 105) (512z:79105) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt index 454b0b49ce..bbe92dc32c 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-15_16:01:52 +DATE: 2024-05-15_09:02:54 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/gcheck.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/gcheck.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.913726e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.919161e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.919238e+01 ) sec^-1 -MeanMatrixElemValue = ( 3.297256e-04 +- 2.011325e-04 ) GeV^-6 -TOTAL : 9.545415 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.057773e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.058162e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.058394e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 2.432670 sec INFO: No Floating Point Exceptions have been reported - 33,072,066,946 cycles:u # 3.456 GHz (75.00%) - 3,412,866 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.01%) - 6,304,861 stalled-cycles-backend:u # 0.02% backend cycles idle (74.93%) - 26,167,059,493 instructions:u # 0.79 insn per cycle - # 0.00 stalled cycles per insn (74.93%) - 9.594535476 seconds time elapsed + 7,976,180,051 cycles # 2.894 GHz + 18,205,979,560 instructions # 2.28 insn per cycle + 2.812149746 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/gcheck.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.524370e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.527476e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.527501e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.973353e-04 +- 5.853892e-04 ) GeV^-6 -TOTAL : 9.021026 sec +EvtsPerSec[Rmb+ME] (23) = ( 9.207912e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.209784e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.210050e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 4.010061 sec INFO: No Floating Point Exceptions have been reported - 31,286,281,881 cycles:u # 3.460 GHz (74.94%) - 3,399,984 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.94%) - 7,513,883 stalled-cycles-backend:u # 0.02% backend cycles idle (75.04%) - 24,680,671,024 instructions:u # 0.79 insn per cycle - # 0.00 stalled cycles per insn (75.05%) - 9.067324066 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/runTest.exe + 12,545,103,701 cycles # 2.894 GHz + 29,508,105,022 instructions # 2.35 insn per cycle + 4.393055123 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 9.872263e-03 -Avg ME (F77/GPU) = 9.8722595284406710E-003 -Relative difference = 3.516477760164775e-07 +Avg ME (F77/GPU) = 9.8722595284406640E-003 +Relative difference = 3.5164777671934515e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.025495e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.025521e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.025521e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 5.154224 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.584447e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.584650e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.584650e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 6.965610 sec INFO: No Floating Point Exceptions have been reported - 18,065,149,337 cycles:u # 3.503 GHz (74.96%) - 29,887,532 stalled-cycles-frontend:u # 0.17% frontend cycles idle (75.03%) - 2,278,927,664 stalled-cycles-backend:u # 12.62% backend cycles idle (75.03%) - 55,089,059,533 instructions:u # 3.05 insn per cycle - # 0.04 stalled cycles per insn (75.03%) - 5.161109340 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:44747) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest.exe + 18,813,969,937 cycles # 2.700 GHz + 53,926,042,619 instructions # 2.87 insn per cycle + 6.969544126 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:32063) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.249808e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.249946e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.249946e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.352174 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.576698e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.576791e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.576791e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 3.353494 sec INFO: No Floating Point Exceptions have been reported - 8,233,739,349 cycles:u # 3.496 GHz (74.90%) - 1,197,145 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.87%) - 796,084,035 stalled-cycles-backend:u # 9.67% backend cycles idle (74.87%) - 27,074,923,325 instructions:u # 3.29 insn per cycle - # 0.03 stalled cycles per insn (74.92%) - 2.359279011 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:97230) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest.exe + 9,734,561,067 cycles # 2.900 GHz + 27,089,258,573 instructions # 2.78 insn per cycle + 3.357505834 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:96286) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 5.161793e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.162463e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.162463e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.027989 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.404360e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.404774e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.404774e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.555973 sec +INFO: No Floating Point Exceptions have been reported + 4,218,180,841 cycles # 2.705 GHz + 9,561,432,896 instructions # 2.27 insn per cycle + 1.560032274 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84478) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.834965e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.835499e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.835499e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.381649 sec INFO: No Floating Point Exceptions have been reported - 3,602,949,118 cycles:u # 3.494 GHz (74.77%) - 1,967,387 stalled-cycles-frontend:u # 0.05% frontend cycles idle (75.18%) - 315,836,274 stalled-cycles-backend:u # 8.77% backend cycles idle (75.18%) - 9,526,844,066 instructions:u # 2.64 insn per cycle - # 0.03 stalled cycles per insn (75.18%) - 1.034547423 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84249) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest.exe + 3,737,866,183 cycles # 2.699 GHz + 8,484,726,102 instructions # 2.27 insn per cycle + 1.385644376 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:80014) (512y: 241) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.375552e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.376120e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.376120e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.569284 sec +INFO: No Floating Point Exceptions have been reported + 2,699,189,606 cycles # 1.717 GHz + 4,275,945,743 instructions # 1.58 insn per cycle + 1.573305719 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2169) (512y: 187) (512z:79110) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index ac9055ac64..d45d414f5f 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-15_16:03:07 +DATE: 2024-05-15_09:03:47 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/gcheck.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/gcheck.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.824649e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.828019e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.828055e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.256794e-06 +- 4.775721e-07 ) GeV^-6 -TOTAL : 4.477029 sec +EvtsPerSec[Rmb+ME] (23) = ( 6.561692e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.562488e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.562848e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 +TOTAL : 1.685180 sec INFO: No Floating Point Exceptions have been reported - 15,361,498,399 cycles:u # 3.414 GHz (74.93%) - 2,842,505 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.89%) - 6,326,674 stalled-cycles-backend:u # 0.04% backend cycles idle (74.93%) - 12,512,265,453 instructions:u # 0.81 insn per cycle - # 0.00 stalled cycles per insn (74.94%) - 4.525778900 seconds time elapsed + 5,673,036,605 cycles # 2.891 GHz + 10,563,415,638 instructions # 1.86 insn per cycle + 2.019219660 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/gcheck.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.198176e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.211673e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.211860e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.930014e-02 +- 1.363569e-02 ) GeV^-6 -TOTAL : 4.713265 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.299284e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.299934e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.300068e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856829e-04 +- 8.333435e-05 ) GeV^-6 +TOTAL : 1.926355 sec INFO: No Floating Point Exceptions have been reported - 16,170,678,935 cycles:u # 3.415 GHz (75.00%) - 2,706,349 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.00%) - 6,618,303 stalled-cycles-backend:u # 0.04% backend cycles idle (74.97%) - 13,155,189,747 instructions:u # 0.81 insn per cycle - # 0.00 stalled cycles per insn (74.97%) - 4.757825820 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/runTest.exe + 6,389,248,766 cycles # 2.887 GHz + 13,556,303,615 instructions # 2.12 insn per cycle + 2.270243999 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/GPU) = 9.855155e-03 -Avg ME (F77/GPU) = 9.8696023209835834E-003 -Relative difference = 0.0014659658811639687 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/GPU) = 9.849636e-03 +Avg ME (F77/GPU) = 9.8712405367667715E-003 +Relative difference = 0.0021934350433631634 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.099668e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.099698e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.099698e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.924324e-03 +- 4.918778e-03 ) GeV^-6 -TOTAL : 4.806907 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.586714e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.586972e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.586972e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 +TOTAL : 6.153257 sec INFO: No Floating Point Exceptions have been reported - 16,850,062,367 cycles:u # 3.503 GHz (74.92%) - 16,747,767 stalled-cycles-frontend:u # 0.10% frontend cycles idle (74.90%) - 1,849,826,643 stalled-cycles-backend:u # 10.98% backend cycles idle (74.98%) - 51,735,478,346 instructions:u # 3.07 insn per cycle - # 0.04 stalled cycles per insn (75.05%) - 4.813514314 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:27812) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest.exe + 17,830,313,772 cycles # 2.896 GHz + 53,588,816,239 instructions # 3.01 insn per cycle + 6.157108561 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:20208) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087414119E-003 -Relative difference = 2.1196409216982896e-08 +Avg ME (F77/C++) = 9.8479612087541066E-003 +Relative difference = 2.1197698286506752e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.616004e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.616551e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.616551e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.924322e-03 +- 4.918776e-03 ) GeV^-6 -TOTAL : 1.149551 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.368933e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.369342e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.369342e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 +TOTAL : 1.572219 sec INFO: No Floating Point Exceptions have been reported - 4,011,716,086 cycles:u # 3.480 GHz (75.02%) - 355,931 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.02%) - 387,377,622 stalled-cycles-backend:u # 9.66% backend cycles idle (75.02%) - 13,707,607,644 instructions:u # 3.42 insn per cycle - # 0.03 stalled cycles per insn (75.02%) - 1.156443653 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:97762) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest.exe + 4,576,967,383 cycles # 2.905 GHz + 13,761,803,462 instructions # 3.01 insn per cycle + 1.576296416 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:96986) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 9.847957e-03 -Avg ME (F77/C++) = 9.8479574833965355E-003 -Relative difference = 4.9085971470122835e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 9.847955e-03 +Avg ME (F77/C++) = 9.8479546896527003E-003 +Relative difference = 3.151388282563952e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.043159e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.043429e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.043429e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 -TOTAL : 0.511560 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.773238e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.774866e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.774866e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.785027 sec INFO: No Floating Point Exceptions have been reported - 1,789,386,733 cycles:u # 3.476 GHz (74.86%) - 417,214 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.14%) - 147,150,451 stalled-cycles-backend:u # 8.22% backend cycles idle (75.14%) - 4,786,787,148 instructions:u # 2.68 insn per cycle - # 0.03 stalled cycles per insn (75.14%) - 0.518956269 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84831) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest.exe + 2,126,166,499 cycles # 2.697 GHz + 4,816,144,945 instructions # 2.27 insn per cycle + 0.789120264 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84904) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 -Avg ME (F77/C++) = 9.8929728161012351E-003 -Relative difference = 1.8588827066662492e-08 +Avg ME (F77/C++) = 9.8929728161070551E-003 +Relative difference = 1.858823877057982e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.733735e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.735805e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.735805e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.687951 sec +INFO: No Floating Point Exceptions have been reported + 1,861,508,781 cycles # 2.693 GHz + 4,273,150,888 instructions # 2.30 insn per cycle + 0.691882515 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:80610) (512y: 46) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 9.892973e-03 +Avg ME (F77/C++) = 9.8929728161070551E-003 +Relative difference = 1.858823877057982e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.772663e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.774875e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.774875e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 +TOTAL : 0.785119 sec +INFO: No Floating Point Exceptions have been reported + 1,354,689,818 cycles # 1.718 GHz + 2,158,158,197 instructions # 1.59 insn per cycle + 0.789277425 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2890) (512y: 49) (512z:79305) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 9.892981e-03 +Avg ME (F77/C++) = 9.8929811982958280E-003 +Relative difference = 2.0044092642523172e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt index b3354e371e..7278096934 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt @@ -1,204 +1,250 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-15_16:27:34 +DATE: 2024-05-15_09:22:33 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/gcheck.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/gcheck.exe -p 1 256 2 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.780211e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.780590e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.780590e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.935145e-03 +- 4.929588e-03 ) GeV^-6 -TOTAL : 4.530503 sec +EvtsPerSec[Rmb+ME] (23) = ( 6.582019e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.583834e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.583834e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187094e-05 +- 9.825664e-06 ) GeV^-6 +TOTAL : 1.629368 sec INFO: No Floating Point Exceptions have been reported - 15,534,265,664 cycles:u # 3.416 GHz (74.91%) - 2,924,027 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.96%) - 7,136,647 stalled-cycles-backend:u # 0.05% backend cycles idle (74.90%) - 12,595,844,127 instructions:u # 0.81 insn per cycle - # 0.00 stalled cycles per insn (75.05%) - 4.579830358 seconds time elapsed + 5,438,501,558 cycles # 2.848 GHz + 11,384,430,806 instructions # 2.09 insn per cycle + 1.966478961 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/gcheck.exe -p 1 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.234213e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.249504e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.249504e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.258769e+00 +- 1.256832e+00 ) GeV^-6 -TOTAL : 4.738620 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.332845e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.346794e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.346794e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856441e-04 +- 8.331096e-05 ) GeV^-6 +TOTAL : 1.897225 sec INFO: No Floating Point Exceptions have been reported - 16,253,633,523 cycles:u # 3.414 GHz (74.97%) - 3,512,403 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.96%) - 48,806,959 stalled-cycles-backend:u # 0.30% backend cycles idle (74.95%) - 13,176,010,291 instructions:u # 0.81 insn per cycle - # 0.00 stalled cycles per insn (74.95%) - 4.783743963 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/runTest.exe + 6,200,851,000 cycles # 2.852 GHz + 13,608,717,924 instructions # 2.19 insn per cycle + 2.231041436 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/GPU) = 9.855155e-03 -Avg ME (F77/GPU) = 9.8696023209835834E-003 -Relative difference = 0.0014659658811639687 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/GPU) = 9.849636e-03 +Avg ME (F77/GPU) = 9.8712405367667715E-003 +Relative difference = 0.0021934350433631634 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.091365e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.091399e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.091399e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.924324e-03 +- 4.918778e-03 ) GeV^-6 -TOTAL : 4.842836 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.468083e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.468339e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.468339e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 +TOTAL : 6.240178 sec INFO: No Floating Point Exceptions have been reported - 16,852,179,806 cycles:u # 3.487 GHz (74.97%) - 17,552,300 stalled-cycles-frontend:u # 0.10% frontend cycles idle (74.98%) - 1,836,450,258 stalled-cycles-backend:u # 10.90% backend cycles idle (75.06%) - 51,702,002,628 instructions:u # 3.07 insn per cycle - # 0.04 stalled cycles per insn (75.05%) - 4.849558254 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:27812) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest.exe + 17,844,842,659 cycles # 2.859 GHz + 53,590,816,936 instructions # 3.00 insn per cycle + 6.244182701 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:20208) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087414119E-003 -Relative difference = 2.1196409216982896e-08 +Avg ME (F77/C++) = 9.8479612087541066E-003 +Relative difference = 2.1197698286506752e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.613972e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.614519e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.614519e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.924322e-03 +- 4.918776e-03 ) GeV^-6 -TOTAL : 1.150231 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.307909e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.308350e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.308350e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 +TOTAL : 1.601295 sec INFO: No Floating Point Exceptions have been reported - 4,027,365,469 cycles:u # 3.491 GHz (74.99%) - 911,377 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.04%) - 404,696,030 stalled-cycles-backend:u # 10.05% backend cycles idle (75.04%) - 13,704,959,643 instructions:u # 3.40 insn per cycle - # 0.03 stalled cycles per insn (75.04%) - 1.157294154 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:97762) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest.exe + 4,582,592,604 cycles # 2.857 GHz + 13,762,912,115 instructions # 3.00 insn per cycle + 1.605688643 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:96986) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 9.847957e-03 -Avg ME (F77/C++) = 9.8479574833965355E-003 -Relative difference = 4.9085971470122835e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 9.847955e-03 +Avg ME (F77/C++) = 9.8479546896527003E-003 +Relative difference = 3.151388282563952e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.002697e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.002960e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.002960e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 -TOTAL : 0.532243 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.657262e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.658867e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.658867e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.798566 sec +INFO: No Floating Point Exceptions have been reported + 2,128,647,672 cycles # 2.655 GHz + 4,817,126,024 instructions # 2.26 insn per cycle + 0.802553686 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84904) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 9.892973e-03 +Avg ME (F77/C++) = 9.8929728161070551E-003 +Relative difference = 1.858823877057982e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=256) +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.573528e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.575580e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.575580e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.702208 sec INFO: No Floating Point Exceptions have been reported - 1,795,554,108 cycles:u # 3.409 GHz (74.73%) - 539,252 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.91%) - 147,823,568 stalled-cycles-backend:u # 8.23% backend cycles idle (75.66%) - 4,785,982,688 instructions:u # 2.67 insn per cycle - # 0.03 stalled cycles per insn (75.29%) - 0.538772971 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84831) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest.exe + 1,871,867,779 cycles # 2.653 GHz + 4,274,231,871 instructions # 2.28 insn per cycle + 0.706209772 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:80610) (512y: 46) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 -Avg ME (F77/C++) = 9.8929728161012351E-003 -Relative difference = 1.8588827066662492e-08 +Avg ME (F77/C++) = 9.8929728161070551E-003 +Relative difference = 1.858823877057982e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=256) +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.572374e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.574488e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.574488e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 +TOTAL : 0.808838 sec +INFO: No Floating Point Exceptions have been reported + 1,356,213,597 cycles # 1.670 GHz + 2,159,107,759 instructions # 1.59 insn per cycle + 0.812846809 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2890) (512y: 49) (512z:79305) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 9.892981e-03 +Avg ME (F77/C++) = 9.8929811982958280E-003 +Relative difference = 2.0044092642523172e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt index 73ac9e8d15..61cd36fc2c 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-15_16:03:55 +DATE: 2024-05-15_09:04:26 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/gcheck.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/gcheck.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.781126e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.784111e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.784134e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.256794e-06 +- 4.775721e-07 ) GeV^-6 -TOTAL : 4.580585 sec +EvtsPerSec[Rmb+ME] (23) = ( 6.545861e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.546657e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.547142e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 +TOTAL : 1.688745 sec INFO: No Floating Point Exceptions have been reported - 15,715,377,910 cycles:u # 3.414 GHz (74.89%) - 2,705,528 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.87%) - 6,427,066 stalled-cycles-backend:u # 0.04% backend cycles idle (74.98%) - 12,774,760,863 instructions:u # 0.81 insn per cycle - # 0.00 stalled cycles per insn (75.07%) - 4.628245906 seconds time elapsed + 5,685,323,668 cycles # 2.881 GHz + 12,065,201,392 instructions # 2.12 insn per cycle + 2.032136430 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/gcheck.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.241498e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.257857e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.257952e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.930014e-02 +- 1.363569e-02 ) GeV^-6 -TOTAL : 4.709271 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.321948e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.322613e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.322750e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856829e-04 +- 8.333435e-05 ) GeV^-6 +TOTAL : 1.913475 sec INFO: No Floating Point Exceptions have been reported - 16,165,125,978 cycles:u # 3.418 GHz (74.97%) - 2,815,066 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.95%) - 5,618,964 stalled-cycles-backend:u # 0.03% backend cycles idle (74.97%) - 13,105,466,925 instructions:u # 0.81 insn per cycle - # 0.00 stalled cycles per insn (74.96%) - 4.755259043 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/runTest.exe + 6,335,955,396 cycles # 2.890 GHz + 13,040,721,092 instructions # 2.06 insn per cycle + 2.248864975 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/GPU) = 9.855155e-03 -Avg ME (F77/GPU) = 9.8696023209835834E-003 -Relative difference = 0.0014659658811639687 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/GPU) = 9.849636e-03 +Avg ME (F77/GPU) = 9.8712405367667715E-003 +Relative difference = 0.0021934350433631634 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.099947e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.099978e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.099978e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.924324e-03 +- 4.918778e-03 ) GeV^-6 -TOTAL : 4.805319 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.627174e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.627430e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.627430e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 +TOTAL : 6.124640 sec INFO: No Floating Point Exceptions have been reported - 16,844,145,885 cycles:u # 3.503 GHz (74.92%) - 16,245,262 stalled-cycles-frontend:u # 0.10% frontend cycles idle (75.00%) - 1,694,910,965 stalled-cycles-backend:u # 10.06% backend cycles idle (75.05%) - 51,703,247,218 instructions:u # 3.07 insn per cycle - # 0.03 stalled cycles per insn (75.05%) - 4.812318727 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:27678) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest.exe + 17,801,480,091 cycles # 2.905 GHz + 53,579,767,361 instructions # 3.01 insn per cycle + 6.128606412 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:20207) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087396841E-003 -Relative difference = 2.119623377106246e-08 +Avg ME (F77/C++) = 9.8479612087582491E-003 +Relative difference = 2.1198118933954545e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.565168e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.565735e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.565735e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.924322e-03 +- 4.918776e-03 ) GeV^-6 -TOTAL : 1.161648 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.387094e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.387509e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.387509e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 +TOTAL : 1.563998 sec INFO: No Floating Point Exceptions have been reported - 4,062,894,999 cycles:u # 3.488 GHz (74.63%) - 852,067 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.65%) - 357,776,995 stalled-cycles-backend:u # 8.81% backend cycles idle (74.99%) - 13,727,657,265 instructions:u # 3.38 insn per cycle - # 0.03 stalled cycles per insn (75.28%) - 1.168660469 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:97728) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest.exe + 4,549,922,930 cycles # 2.903 GHz + 13,755,140,352 instructions # 3.02 insn per cycle + 1.568011182 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:96606) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 9.847957e-03 -Avg ME (F77/C++) = 9.8479574833965355E-003 -Relative difference = 4.9085971470122835e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 9.847955e-03 +Avg ME (F77/C++) = 9.8479546896225560E-003 +Relative difference = 3.151694379513441e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.039836e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.040097e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.040097e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 -TOTAL : 0.513060 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.714186e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.715793e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.715793e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.791730 sec INFO: No Floating Point Exceptions have been reported - 1,810,288,381 cycles:u # 3.506 GHz (74.39%) - 451,194 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.74%) - 157,116,579 stalled-cycles-backend:u # 8.68% backend cycles idle (75.21%) - 4,786,726,252 instructions:u # 2.64 insn per cycle - # 0.03 stalled cycles per insn (75.21%) - 0.520128633 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84793) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest.exe + 2,142,665,364 cycles # 2.695 GHz + 4,818,205,802 instructions # 2.25 insn per cycle + 0.795892612 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:85359) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 -Avg ME (F77/C++) = 9.8929728161012351E-003 -Relative difference = 1.8588827066662492e-08 +Avg ME (F77/C++) = 9.8929728161070967E-003 +Relative difference = 1.8588234562202478e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.551977e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.554036e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.554036e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.704224 sec +INFO: No Floating Point Exceptions have been reported + 1,876,007,792 cycles # 2.651 GHz + 4,275,151,801 instructions # 2.28 insn per cycle + 0.708285391 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:81075) (512y: 26) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 9.892973e-03 +Avg ME (F77/C++) = 9.8929728161070967E-003 +Relative difference = 1.8588234562202478e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.544217e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.546156e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.546156e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 +TOTAL : 0.811936 sec +INFO: No Floating Point Exceptions have been reported + 1,361,211,310 cycles # 1.670 GHz + 2,164,522,586 instructions # 1.59 insn per cycle + 0.816005153 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3487) (512y: 34) (512z:79499) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 9.892981e-03 +Avg ME (F77/C++) = 9.8929811982955140E-003 +Relative difference = 2.0044060904369713e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 77978bf00a..d9b2e71fbd 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-15_16:04:42 +DATE: 2024-05-15_09:05:05 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/gcheck.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/gcheck.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.524260e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.528987e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.529052e+01 ) sec^-1 -MeanMatrixElemValue = ( 3.297256e-04 +- 2.011325e-04 ) GeV^-6 -TOTAL : 9.815536 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.690095e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.690606e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.690910e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 2.176133 sec INFO: No Floating Point Exceptions have been reported - 34,043,824,193 cycles:u # 3.460 GHz (74.98%) - 3,636,313 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.99%) - 7,086,491 stalled-cycles-backend:u # 0.02% backend cycles idle (74.98%) - 26,859,805,206 instructions:u # 0.79 insn per cycle - # 0.00 stalled cycles per insn (75.01%) - 9.874473184 seconds time elapsed + 7,139,703,316 cycles # 2.856 GHz + 15,654,967,363 instructions # 2.19 insn per cycle + 2.555531923 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/gcheck.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.282292e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.285215e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.285247e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.973353e-04 +- 5.853892e-04 ) GeV^-6 -TOTAL : 9.357565 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.111988e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.112256e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.112295e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 3.411127 sec INFO: No Floating Point Exceptions have been reported - 32,418,132,505 cycles:u # 3.457 GHz (75.01%) - 3,562,303 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.00%) - 7,497,796 stalled-cycles-backend:u # 0.02% backend cycles idle (75.01%) - 25,645,021,335 instructions:u # 0.79 insn per cycle - # 0.00 stalled cycles per insn (75.00%) - 9.405477768 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/runTest.exe + 10,670,396,905 cycles # 2.855 GHz + 22,897,427,762 instructions # 2.15 insn per cycle + 3.795230746 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 9.872263e-03 -Avg ME (F77/GPU) = 9.8722599015656533E-003 -Relative difference = 3.138524921691728e-07 +Avg ME (F77/GPU) = 9.8722599015656498E-003 +Relative difference = 3.1385249252060663e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.019602e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.019628e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.019628e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 5.183972 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.341488e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.341701e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.341701e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 7.183650 sec INFO: No Floating Point Exceptions have been reported - 18,165,747,128 cycles:u # 3.502 GHz (74.99%) - 33,524,590 stalled-cycles-frontend:u # 0.18% frontend cycles idle (75.02%) - 2,322,453,909 stalled-cycles-backend:u # 12.78% backend cycles idle (75.02%) - 55,334,612,248 instructions:u # 3.05 insn per cycle - # 0.04 stalled cycles per insn (75.02%) - 5.190865459 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:44898) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest.exe + 19,087,208,073 cycles # 2.656 GHz + 54,154,763,894 instructions # 2.84 insn per cycle + 7.187584076 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:32067) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595861831675E-003 Relative difference = 3.457988134687711e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.335635e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.335771e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.335771e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.266045 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.509804e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.509885e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.509885e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 3.502059 sec INFO: No Floating Point Exceptions have been reported - 7,931,224,922 cycles:u # 3.495 GHz (74.98%) - 1,871,765 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.97%) - 761,023,176 stalled-cycles-backend:u # 9.60% backend cycles idle (74.97%) - 25,815,110,620 instructions:u # 3.25 insn per cycle - # 0.03 stalled cycles per insn (74.97%) - 2.272824224 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96804) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest.exe + 9,306,165,950 cycles # 2.655 GHz + 26,158,910,898 instructions # 2.81 insn per cycle + 3.506050842 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:96007) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594844308162E-003 Relative difference = 3.5610570575237004e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 5.390146e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.390881e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.390881e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 0.985055 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.456453e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.456874e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.456874e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.532803 sec +INFO: No Floating Point Exceptions have been reported + 4,070,475,260 cycles # 2.650 GHz + 9,228,220,154 instructions # 2.27 insn per cycle + 1.536759047 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84155) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722594324461913E-003 +Relative difference = 3.613714310412983e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.023017e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.023619e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.023619e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.317421 sec INFO: No Floating Point Exceptions have been reported - 3,437,555,760 cycles:u # 3.478 GHz (74.93%) - 1,502,958 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.91%) - 308,584,077 stalled-cycles-backend:u # 8.98% backend cycles idle (74.91%) - 9,068,955,721 instructions:u # 2.64 insn per cycle - # 0.03 stalled cycles per insn (74.91%) - 0.991911378 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:83820) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest.exe + 3,502,203,854 cycles # 2.652 GHz + 8,174,654,623 instructions # 2.33 insn per cycle + 1.321520742 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:79844) (512y: 79) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.376263e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.376781e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.376781e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.569331 sec +INFO: No Floating Point Exceptions have been reported + 2,616,979,256 cycles # 1.664 GHz + 4,154,596,109 instructions # 1.59 insn per cycle + 1.573481593 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2046) (512y: 93) (512z:78760) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722594324461913E-003 +Relative difference = 3.613714310412983e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt index e2edcec93b..5a0d7870a2 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-15_16:05:58 +DATE: 2024-05-15_09:05:57 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/gcheck.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/gcheck.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.460765e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.465659e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.465728e+01 ) sec^-1 -MeanMatrixElemValue = ( 3.297256e-04 +- 2.011325e-04 ) GeV^-6 -TOTAL : 9.907182 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.685558e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.686059e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.686349e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 2.179217 sec INFO: No Floating Point Exceptions have been reported - 34,383,552,437 cycles:u # 3.463 GHz (74.95%) - 3,543,559 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.97%) - 8,052,686 stalled-cycles-backend:u # 0.02% backend cycles idle (75.00%) - 27,130,894,842 instructions:u # 0.79 insn per cycle - # 0.00 stalled cycles per insn (75.01%) - 9.954843245 seconds time elapsed + 7,132,247,686 cycles # 2.849 GHz + 16,033,584,026 instructions # 2.25 insn per cycle + 2.559487651 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/gcheck.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.298172e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.301396e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.301429e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.973353e-04 +- 5.853892e-04 ) GeV^-6 -TOTAL : 9.358072 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.107499e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.107770e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.107809e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 3.414639 sec INFO: No Floating Point Exceptions have been reported - 32,411,383,419 cycles:u # 3.456 GHz (75.01%) - 3,392,821 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.99%) - 6,644,841 stalled-cycles-backend:u # 0.02% backend cycles idle (75.00%) - 25,629,804,665 instructions:u # 0.79 insn per cycle - # 0.00 stalled cycles per insn (75.01%) - 9.405448142 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/runTest.exe + 10,670,291,088 cycles # 2.854 GHz + 23,679,613,894 instructions # 2.22 insn per cycle + 3.796653222 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 9.872263e-03 -Avg ME (F77/GPU) = 9.8722599015656533E-003 -Relative difference = 3.138524921691728e-07 +Avg ME (F77/GPU) = 9.8722599015656498E-003 +Relative difference = 3.1385249252060663e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.022890e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.022916e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.022916e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 5.166851 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.443183e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.443381e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.443381e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 7.087077 sec INFO: No Floating Point Exceptions have been reported - 18,110,860,966 cycles:u # 3.503 GHz (74.98%) - 29,566,243 stalled-cycles-frontend:u # 0.16% frontend cycles idle (74.94%) - 2,252,660,563 stalled-cycles-backend:u # 12.44% backend cycles idle (74.93%) - 55,408,521,673 instructions:u # 3.06 insn per cycle - # 0.04 stalled cycles per insn (74.93%) - 5.173774467 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:44806) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest.exe + 19,147,646,492 cycles # 2.701 GHz + 54,154,361,807 instructions # 2.83 insn per cycle + 7.091178221 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:32244) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595861831675E-003 Relative difference = 3.457988134687711e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.352806e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.352944e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.352944e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.250388 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.486367e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.486451e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.486451e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 3.556604 sec INFO: No Floating Point Exceptions have been reported - 7,882,617,366 cycles:u # 3.498 GHz (74.80%) - 2,227,961 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.85%) - 819,605,329 stalled-cycles-backend:u # 10.40% backend cycles idle (75.03%) - 25,767,506,663 instructions:u # 3.27 insn per cycle - # 0.03 stalled cycles per insn (75.15%) - 2.257234001 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96765) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest.exe + 9,388,405,137 cycles # 2.638 GHz + 26,078,220,173 instructions # 2.78 insn per cycle + 3.560671471 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:95901) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594844308162E-003 Relative difference = 3.5610570575237004e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 5.481441e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.482179e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.482179e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 0.968592 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.497483e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.497914e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.497914e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.514756 sec +INFO: No Floating Point Exceptions have been reported + 4,017,002,412 cycles # 2.646 GHz + 9,213,710,845 instructions # 2.29 insn per cycle + 1.518740707 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:83776) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722594324461913E-003 +Relative difference = 3.613714310412983e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.994066e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.994690e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.994690e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.326970 sec INFO: No Floating Point Exceptions have been reported - 3,391,090,231 cycles:u # 3.489 GHz (74.49%) - 1,313,193 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.74%) - 318,676,750 stalled-cycles-backend:u # 9.40% backend cycles idle (75.15%) - 9,050,899,584 instructions:u # 2.67 insn per cycle - # 0.04 stalled cycles per insn (75.31%) - 0.975333813 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:83378) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest.exe + 3,524,449,301 cycles # 2.649 GHz + 8,167,512,543 instructions # 2.32 insn per cycle + 1.330907157 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:79373) (512y: 229) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.358102e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.358609e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.358609e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.577322 sec +INFO: No Floating Point Exceptions have been reported + 2,627,871,884 cycles # 1.663 GHz + 4,154,099,949 instructions # 1.58 insn per cycle + 1.581349880 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1493) (512y: 175) (512z:78776) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722594324461913E-003 +Relative difference = 3.613714310412983e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 8b0482f7d4..bddb87b89c 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -1,70 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-15_16:00:16 +DATE: 2024-05-15_09:00:42 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/gcheck.exe: Segmentation fault - 708,386,539 cycles:u # 0.721 GHz (73.83%) - 2,301,186 stalled-cycles-frontend:u # 0.32% frontend cycles idle (74.20%) - 5,237,056 stalled-cycles-backend:u # 0.74% backend cycles idle (75.42%) - 1,246,636,487 instructions:u # 1.76 insn per cycle - # 0.00 stalled cycles per insn (75.74%) - 1.012706895 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.909030e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.254339e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.559590e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.451814 sec +INFO: No Floating Point Exceptions have been reported + 1,901,362,158 cycles # 2.851 GHz + 2,671,473,853 instructions # 1.41 insn per cycle + 0.724019133 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.666389e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.186579e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.531944e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.532226 sec +INFO: No Floating Point Exceptions have been reported + 2,173,433,084 cycles # 2.828 GHz + 3,153,637,703 instructions # 1.45 insn per cycle + 0.825328005 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/GPU) = 1.424749e-01 +Avg ME (F77/GPU) = 0.14247482467490466 +Relative difference = 5.286902838873106e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/gcheck.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.011847e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.033729e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.033729e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.642681 sec +INFO: No Floating Point Exceptions have been reported + 4,720,170,815 cycles # 2.868 GHz + 13,462,481,531 instructions # 2.85 insn per cycle + 1.646920268 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 860) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467499484 +Relative difference = 5.286896509487005e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.850569e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.921982e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.921982e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.907708 sec +INFO: No Floating Point Exceptions have been reported + 2,621,073,603 cycles # 2.876 GHz + 7,551,555,914 instructions # 2.88 insn per cycle + 0.911980507 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3099) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467499478 +Relative difference = 5.28689651338321e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/gcheck.exe: Segmentation fault - 904,269,601 cycles:u # 2.111 GHz (75.74%) - 2,203,814 stalled-cycles-frontend:u # 0.24% frontend cycles idle (75.74%) - 5,510,229 stalled-cycles-backend:u # 0.61% backend cycles idle (74.91%) - 1,374,598,901 instructions:u # 1.52 insn per cycle - # 0.00 stalled cycles per insn (74.94%) - 0.454069271 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/runTest.exe -Memory access fault by GPU node-4 (Agent handle: 0x6025e0) on address 0x14609c00a000. Reason: Unknown. +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.026606e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.230232e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.230232e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.564761 sec +INFO: No Floating Point Exceptions have been reported + 1,486,098,299 cycles # 2.614 GHz + 3,119,399,678 instructions # 2.10 insn per cycle + 0.569234281 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2917) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492595 +Relative difference = 5.286901344678233e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.466118e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.717374e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.717374e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.495756 sec +INFO: No Floating Point Exceptions have been reported + 1,343,361,512 cycles # 2.691 GHz + 2,981,714,602 instructions # 2.22 insn per cycle + 0.500153355 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2694) (512y: 104) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492595 +Relative difference = 5.286901344678233e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.236724e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.347469e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.347469e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.757688 sec +INFO: No Floating Point Exceptions have been reported + 1,336,802,616 cycles # 1.756 GHz + 1,953,160,303 instructions # 1.46 insn per cycle + 0.761809117 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1372) (512y: 106) (512z: 2173) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492595 +Relative difference = 5.286901344678233e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt index a8089ff9c1..1e67b43177 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt @@ -1,78 +1,250 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-15_16:25:24 +DATE: 2024-05-15_09:20:13 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/gcheck.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/gcheck.exe: Segmentation fault - 768,592,038 cycles:u # 2.241 GHz (74.56%) - 2,610,445 stalled-cycles-frontend:u # 0.34% frontend cycles idle (75.01%) - 30,452,677 stalled-cycles-backend:u # 3.96% backend cycles idle (76.43%) - 1,205,736,295 instructions:u # 1.57 insn per cycle - # 0.03 stalled cycles per insn (76.56%) - 0.371032601 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.448925e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.124463e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.124463e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.483255 sec +INFO: No Floating Point Exceptions have been reported + 1,956,343,524 cycles # 2.810 GHz + 2,913,177,590 instructions # 1.49 insn per cycle + 0.753489640 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/gcheck.exe: Segmentation fault - 2,992,221,292 cycles:u # 2.788 GHz (75.58%) - 16,372,327 stalled-cycles-frontend:u # 0.55% frontend cycles idle (75.41%) - 841,167,524 stalled-cycles-backend:u # 28.11% backend cycles idle (75.41%) - 3,204,323,336 instructions:u # 1.07 insn per cycle - # 0.26 stalled cycles per insn (74.98%) - 1.094948928 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/runTest.exe -Memory access fault by GPU node-4 (Agent handle: 0x6025e0) on address 0x147dd511a000. Reason: Unknown. +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.145662e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.331613e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.331613e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.764560 sec +INFO: No Floating Point Exceptions have been reported + 2,875,174,546 cycles # 2.829 GHz + 4,447,931,202 instructions # 1.55 insn per cycle + 1.073875112 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/GPU) = 1.424749e-01 +Avg ME (F77/GPU) = 0.14247482467490466 +Relative difference = 5.286902838873106e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/gcheck.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.006409e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.028326e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.028326e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.659684 sec +INFO: No Floating Point Exceptions have been reported + 4,757,299,192 cycles # 2.860 GHz + 13,469,690,210 instructions # 2.83 insn per cycle + 1.664301921 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 860) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467499484 +Relative difference = 5.286896509487005e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.826759e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.897996e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.897996e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.926560 sec +INFO: No Floating Point Exceptions have been reported + 2,658,383,528 cycles # 2.857 GHz + 7,600,215,451 instructions # 2.86 insn per cycle + 0.930908981 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3099) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467499478 +Relative difference = 5.28689651338321e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.079827e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.284873e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.284873e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.562992 sec +INFO: No Floating Point Exceptions have been reported + 1,517,343,501 cycles # 2.677 GHz + 3,170,040,097 instructions # 2.09 insn per cycle + 0.567535406 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2917) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492595 +Relative difference = 5.286901344678233e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.421067e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.671215e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.671215e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.509037 sec +INFO: No Floating Point Exceptions have been reported + 1,373,998,265 cycles # 2.680 GHz + 3,030,756,412 instructions # 2.21 insn per cycle + 0.513395811 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2694) (512y: 104) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492595 +Relative difference = 5.286901344678233e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.223104e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.330253e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.330253e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.769822 sec +INFO: No Floating Point Exceptions have been reported + 1,369,271,721 cycles # 1.771 GHz + 1,991,169,855 instructions # 1.45 insn per cycle + 0.774195853 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1372) (512y: 106) (512z: 2173) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492595 +Relative difference = 5.286901344678233e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt index 0ab4ed0959..4aab0bf5ea 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt @@ -1,70 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-15_16:00:20 +DATE: 2024-05-15_09:00:56 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/gcheck.exe: Segmentation fault - 759,819,625 cycles:u # 2.319 GHz (70.92%) - 2,381,645 stalled-cycles-frontend:u # 0.31% frontend cycles idle (73.81%) - 5,666,350 stalled-cycles-backend:u # 0.75% backend cycles idle (76.18%) - 1,226,165,010 instructions:u # 1.61 insn per cycle - # 0.00 stalled cycles per insn (75.61%) - 0.353045104 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.898149e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.161627e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.461883e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.453219 sec +INFO: No Floating Point Exceptions have been reported + 1,899,143,143 cycles # 2.817 GHz + 2,687,151,052 instructions # 1.41 insn per cycle + 0.730634084 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.646029e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.108157e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.446695e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.533523 sec +INFO: No Floating Point Exceptions have been reported + 2,165,769,421 cycles # 2.820 GHz + 3,118,817,395 instructions # 1.44 insn per cycle + 0.826557976 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/GPU) = 1.424749e-01 +Avg ME (F77/GPU) = 0.14247482467490466 +Relative difference = 5.286902838873106e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/gcheck.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.012188e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.033722e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.033722e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.642236 sec +INFO: No Floating Point Exceptions have been reported + 4,733,263,322 cycles # 2.876 GHz + 13,456,928,369 instructions # 2.84 insn per cycle + 1.646542126 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 849) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467499484 +Relative difference = 5.286896509487005e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.865658e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.935819e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.935819e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.900438 sec +INFO: No Floating Point Exceptions have been reported + 2,628,911,017 cycles # 2.909 GHz + 7,551,755,138 instructions # 2.87 insn per cycle + 0.904636036 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3092) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467499478 +Relative difference = 5.28689651338321e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/gcheck.exe: Segmentation fault - 888,295,777 cycles:u # 2.066 GHz (75.99%) - 2,126,930 stalled-cycles-frontend:u # 0.24% frontend cycles idle (75.76%) - 5,545,150 stalled-cycles-backend:u # 0.62% backend cycles idle (75.88%) - 1,381,325,489 instructions:u # 1.56 insn per cycle - # 0.00 stalled cycles per insn (75.70%) - 0.454497732 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/runTest.exe -Memory access fault by GPU node-4 (Agent handle: 0x601530) on address 0x14af98559000. Reason: Unknown. +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.162085e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.369795e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.369795e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.541035 sec +INFO: No Floating Point Exceptions have been reported + 1,478,892,065 cycles # 2.715 GHz + 3,118,063,669 instructions # 2.11 insn per cycle + 0.545344535 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2900) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492595 +Relative difference = 5.286901344678233e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.521390e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.773050e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.773050e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.487980 sec +INFO: No Floating Point Exceptions have been reported + 1,338,590,636 cycles # 2.723 GHz + 2,978,713,537 instructions # 2.23 insn per cycle + 0.492213180 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2670) (512y: 104) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492595 +Relative difference = 5.286901344678233e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.305225e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.413141e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.413141e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.735631 sec +INFO: No Floating Point Exceptions have been reported + 1,327,963,112 cycles # 1.797 GHz + 1,951,639,234 instructions # 1.47 insn per cycle + 0.739806638 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1348) (512y: 106) (512z: 2173) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492595 +Relative difference = 5.286901344678233e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 51deeeaea3..c1da5435c4 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -1,70 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-15_16:00:24 +DATE: 2024-05-15_09:01:09 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/gcheck.exe: Segmentation fault - 704,059,057 cycles:u # 2.157 GHz (76.09%) - 2,115,916 stalled-cycles-frontend:u # 0.30% frontend cycles idle (75.51%) - 5,481,551 stalled-cycles-backend:u # 0.78% backend cycles idle (75.73%) - 1,161,996,833 instructions:u # 1.65 insn per cycle - # 0.00 stalled cycles per insn (77.74%) - 0.353298917 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.475418e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.211242e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.342424e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 +TOTAL : 0.444704 sec +INFO: No Floating Point Exceptions have been reported + 1,889,829,448 cycles # 2.849 GHz + 2,675,641,307 instructions # 1.42 insn per cycle + 0.720116124 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 167 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.067311e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.829054e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.961375e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.571361e+02 +- 2.114021e+02 ) GeV^-2 +TOTAL : 0.479911 sec +INFO: No Floating Point Exceptions have been reported + 2,018,885,875 cycles # 2.854 GHz + 2,869,563,274 instructions # 1.42 insn per cycle + 0.764758987 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/GPU) = 1.424226e-01 +Avg ME (F77/GPU) = 0.14247488790821983 +Relative difference = 0.00036713209996037764 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/gcheck.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.086530e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.111191e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.111191e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 1.529427 sec +INFO: No Floating Point Exceptions have been reported + 4,457,289,207 cycles # 2.908 GHz + 13,047,683,288 instructions # 2.93 insn per cycle + 1.533439591 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 745) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424686e-01 +Avg ME (F77/C++) = 0.14246857540270419 +Relative difference = 1.7265064590569047e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.919635e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.106235e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.106235e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 +TOTAL : 0.582181 sec +INFO: No Floating Point Exceptions have been reported + 1,698,976,531 cycles # 2.901 GHz + 4,511,883,636 instructions # 2.66 insn per cycle + 0.586192712 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3600) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424686e-01 +Avg ME (F77/C++) = 0.14246859631675157 +Relative difference = 2.5853054135974944e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/gcheck.exe: Segmentation fault - 798,428,609 cycles:u # 2.151 GHz (74.43%) - 2,320,340 stalled-cycles-frontend:u # 0.29% frontend cycles idle (72.28%) - 5,688,890 stalled-cycles-backend:u # 0.71% backend cycles idle (74.66%) - 1,253,832,398 instructions:u # 1.57 insn per cycle - # 0.00 stalled cycles per insn (76.66%) - 0.393426066 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/runTest.exe -Memory access fault by GPU node-4 (Agent handle: 0x5ee5e0) on address 0x14c67eab5000. Reason: Unknown. +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.693981e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.403408e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.403408e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.308817 sec +INFO: No Floating Point Exceptions have been reported + 850,392,085 cycles # 2.723 GHz + 1,895,931,529 instructions # 2.23 insn per cycle + 0.312838116 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3491) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247489318272599 +Relative difference = 4.784894739577799e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.858267e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.647572e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.647572e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.301291 sec +INFO: No Floating Point Exceptions have been reported + 806,075,929 cycles # 2.644 GHz + 1,819,584,061 instructions # 2.26 insn per cycle + 0.305428041 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3335) (512y: 22) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247489318272599 +Relative difference = 4.784894739577799e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.437479e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.882177e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.882177e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.392579 sec +INFO: No Floating Point Exceptions have been reported + 734,620,005 cycles # 1.854 GHz + 1,304,440,768 instructions # 1.78 insn per cycle + 0.396763024 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1973) (512y: 32) (512z: 2382) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247489383243206 +Relative difference = 4.32888033512879e-08 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt index a126c1faff..99a9bd4312 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt @@ -1,78 +1,250 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-15_16:25:29 +DATE: 2024-05-15_09:20:27 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/gcheck.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/gcheck.exe: Segmentation fault - 729,145,754 cycles:u # 2.125 GHz (76.71%) - 2,413,344 stalled-cycles-frontend:u # 0.33% frontend cycles idle (72.78%) - 41,308,952 stalled-cycles-backend:u # 5.67% backend cycles idle (70.29%) - 1,255,558,408 instructions:u # 1.72 insn per cycle - # 0.03 stalled cycles per insn (74.72%) - 0.364988449 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.331329e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.996965e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.996965e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.017654e+01 +- 1.429184e+01 ) GeV^-2 +TOTAL : 0.463597 sec +INFO: No Floating Point Exceptions have been reported + 1,898,524,563 cycles # 2.813 GHz + 2,749,841,290 instructions # 1.45 insn per cycle + 0.733163352 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 167 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/gcheck.exe: Segmentation fault - 2,823,378,972 cycles:u # 2.845 GHz (75.74%) - 16,838,298 stalled-cycles-frontend:u # 0.60% frontend cycles idle (74.91%) - 860,485,057 stalled-cycles-backend:u # 30.48% backend cycles idle (74.04%) - 3,205,772,176 instructions:u # 1.14 insn per cycle - # 0.27 stalled cycles per insn (74.68%) - 1.015525494 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/runTest.exe -Memory access fault by GPU node-4 (Agent handle: 0x5ee5e0) on address 0x150497215000. Reason: Unknown. +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.814076e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.499907e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.499907e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.609942e+02 +- 2.115590e+02 ) GeV^-2 +TOTAL : 0.636307 sec +INFO: No Floating Point Exceptions have been reported + 2,461,375,134 cycles # 2.827 GHz + 3,720,831,531 instructions # 1.51 insn per cycle + 0.927666351 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/GPU) = 1.424226e-01 +Avg ME (F77/GPU) = 0.14247488790821983 +Relative difference = 0.00036713209996037764 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/gcheck.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.067015e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.091473e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.091473e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 1.560902 sec +INFO: No Floating Point Exceptions have been reported + 4,476,631,631 cycles # 2.861 GHz + 13,052,576,283 instructions # 2.92 insn per cycle + 1.565284133 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 745) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424686e-01 +Avg ME (F77/C++) = 0.14246857540270419 +Relative difference = 1.7265064590569047e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.849727e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.036356e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.036356e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 +TOTAL : 0.601117 sec +INFO: No Floating Point Exceptions have been reported + 1,725,235,767 cycles # 2.853 GHz + 4,560,189,479 instructions # 2.64 insn per cycle + 0.605343025 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3600) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424686e-01 +Avg ME (F77/C++) = 0.14246859631675157 +Relative difference = 2.5853054135974944e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.549970e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.241462e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.241462e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.320675 sec +INFO: No Floating Point Exceptions have been reported + 870,683,283 cycles # 2.684 GHz + 1,932,963,570 instructions # 2.22 insn per cycle + 0.324927313 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3491) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247489318272599 +Relative difference = 4.784894739577799e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.928578e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.731935e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.731935e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.302147 sec +INFO: No Floating Point Exceptions have been reported + 823,010,972 cycles # 2.692 GHz + 1,856,018,050 instructions # 2.26 insn per cycle + 0.306433740 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3335) (512y: 22) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247489318272599 +Relative difference = 4.784894739577799e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.308589e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.721286e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.721286e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.408402 sec +INFO: No Floating Point Exceptions have been reported + 755,565,484 cycles # 1.834 GHz + 1,346,064,637 instructions # 1.78 insn per cycle + 0.412615974 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1973) (512y: 32) (512z: 2382) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247489383243206 +Relative difference = 4.32888033512879e-08 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt index 04f6ef75ef..ee7754c1e5 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt @@ -1,70 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-15_16:00:27 +DATE: 2024-05-15_09:01:21 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/gcheck.exe: Segmentation fault - 722,389,001 cycles:u # 2.213 GHz (73.26%) - 2,066,765 stalled-cycles-frontend:u # 0.29% frontend cycles idle (76.17%) - 5,958,775 stalled-cycles-backend:u # 0.82% backend cycles idle (75.52%) - 1,198,904,032 instructions:u # 1.66 insn per cycle - # 0.00 stalled cycles per insn (75.64%) - 0.353053631 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.619671e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.171347e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.321938e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 +TOTAL : 0.449334 sec +INFO: No Floating Point Exceptions have been reported + 1,911,458,979 cycles # 2.837 GHz + 2,678,993,367 instructions # 1.40 insn per cycle + 0.731911481 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 167 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.999796e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.792626e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.907811e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.571361e+02 +- 2.114021e+02 ) GeV^-2 +TOTAL : 0.480185 sec +INFO: No Floating Point Exceptions have been reported + 2,019,442,505 cycles # 2.856 GHz + 2,899,181,374 instructions # 1.44 insn per cycle + 0.764032904 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/GPU) = 1.424226e-01 +Avg ME (F77/GPU) = 0.14247488790821983 +Relative difference = 0.00036713209996037764 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/gcheck.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.071947e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.096218e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.096218e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 1.550896 sec +INFO: No Floating Point Exceptions have been reported + 4,453,962,103 cycles # 2.867 GHz + 13,028,729,940 instructions # 2.93 insn per cycle + 1.555070395 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 727) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424686e-01 +Avg ME (F77/C++) = 0.14246857540270419 +Relative difference = 1.7265064590569047e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.922555e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.112075e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.112075e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 +TOTAL : 0.581270 sec +INFO: No Floating Point Exceptions have been reported + 1,695,848,596 cycles # 2.898 GHz + 4,508,366,642 instructions # 2.66 insn per cycle + 0.585397485 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3588) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424686e-01 +Avg ME (F77/C++) = 0.14246859631675157 +Relative difference = 2.5853054135974944e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/gcheck.exe: Segmentation fault - 800,574,835 cycles:u # 2.111 GHz (75.14%) - 2,111,514 stalled-cycles-frontend:u # 0.26% frontend cycles idle (77.40%) - 4,989,204 stalled-cycles-backend:u # 0.62% backend cycles idle (75.40%) - 1,371,821,186 instructions:u # 1.71 insn per cycle - # 0.00 stalled cycles per insn (72.18%) - 0.402319566 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/runTest.exe -Memory access fault by GPU node-4 (Agent handle: 0x5ed530) on address 0x146a8b46c000. Reason: Unknown. +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.713904e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.428103e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.428103e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.307920 sec +INFO: No Floating Point Exceptions have been reported + 851,602,918 cycles # 2.734 GHz + 1,892,990,089 instructions # 2.22 insn per cycle + 0.312008999 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3461) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247489318272599 +Relative difference = 4.784894739577799e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.076914e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.899685e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.899685e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.290714 sec +INFO: No Floating Point Exceptions have been reported + 799,236,939 cycles # 2.715 GHz + 1,815,219,617 instructions # 2.27 insn per cycle + 0.294888319 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3298) (512y: 22) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247489318272599 +Relative difference = 4.784894739577799e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.454618e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.885329e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.885329e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.390931 sec +INFO: No Floating Point Exceptions have been reported + 736,041,895 cycles # 1.866 GHz + 1,302,074,830 instructions # 1.77 insn per cycle + 0.395025185 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1936) (512y: 32) (512z: 2382) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247489383243206 +Relative difference = 4.32888033512879e-08 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index 49a0535746..1fa62c82ef 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -1,70 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-15_16:00:30 +DATE: 2024-05-15_09:01:33 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/gcheck.exe: Segmentation fault - 750,335,748 cycles:u # 2.276 GHz (70.51%) - 2,089,979 stalled-cycles-frontend:u # 0.28% frontend cycles idle (73.95%) - 5,698,519 stalled-cycles-backend:u # 0.76% backend cycles idle (76.35%) - 1,212,281,398 instructions:u # 1.62 insn per cycle - # 0.00 stalled cycles per insn (75.76%) - 0.353482563 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.864147e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.385266e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.725274e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.450470 sec +INFO: No Floating Point Exceptions have been reported + 1,909,070,340 cycles # 2.862 GHz + 2,709,311,513 instructions # 1.42 insn per cycle + 0.723332111 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.665623e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.221672e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.571452e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.527783 sec +INFO: No Floating Point Exceptions have been reported + 2,213,027,924 cycles # 2.867 GHz + 3,201,104,338 instructions # 1.45 insn per cycle + 0.828647636 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/GPU) = 1.424749e-01 +Avg ME (F77/GPU) = 0.14247482577104625 +Relative difference = 5.209967070245855e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/gcheck.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.016159e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.037746e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.037746e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.635594 sec +INFO: No Floating Point Exceptions have been reported + 4,753,331,270 cycles # 2.901 GHz + 13,466,539,270 instructions # 2.83 insn per cycle + 1.639815517 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 840) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482734618697 +Relative difference = 5.099411406595165e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.881999e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.954937e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.954937e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.893130 sec +INFO: No Floating Point Exceptions have been reported + 2,606,846,984 cycles # 2.907 GHz + 7,384,622,562 instructions # 2.83 insn per cycle + 0.897377244 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3073) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482734618697 +Relative difference = 5.099411406595165e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/gcheck.exe: Segmentation fault - 926,771,584 cycles:u # 2.147 GHz (75.14%) - 2,293,476 stalled-cycles-frontend:u # 0.25% frontend cycles idle (72.10%) - 5,492,410 stalled-cycles-backend:u # 0.59% backend cycles idle (73.93%) - 1,353,222,126 instructions:u # 1.46 insn per cycle - # 0.00 stalled cycles per insn (76.26%) - 0.453968445 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/runTest.exe -Memory access fault by GPU node-4 (Agent handle: 0x6025e0) on address 0x1537cffba000. Reason: Unknown. +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.203164e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.414319e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.414319e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.534464 sec +INFO: No Floating Point Exceptions have been reported + 1,468,689,876 cycles # 2.729 GHz + 3,055,369,851 instructions # 2.08 insn per cycle + 0.538802998 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3013) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.611648e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.879569e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.879569e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.476482 sec +INFO: No Floating Point Exceptions have been reported + 1,311,176,707 cycles # 2.731 GHz + 2,929,976,202 instructions # 2.23 insn per cycle + 0.480656090 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2799) (512y: 110) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.242556e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.345674e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.345674e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.755974 sec +INFO: No Floating Point Exceptions have been reported + 1,368,037,439 cycles # 1.801 GHz + 1,969,314,734 instructions # 1.44 insn per cycle + 0.760192026 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1700) (512y: 114) (512z: 2171) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt index f9c8bb0940..08bb5f814d 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt @@ -1,70 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-15_16:00:34 +DATE: 2024-05-15_09:01:47 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/gcheck.exe: Segmentation fault - 694,936,346 cycles:u # 2.113 GHz (75.86%) - 2,256,716 stalled-cycles-frontend:u # 0.32% frontend cycles idle (73.76%) - 5,727,296 stalled-cycles-backend:u # 0.82% backend cycles idle (71.33%) - 1,273,451,792 instructions:u # 1.83 insn per cycle - # 0.00 stalled cycles per insn (73.85%) - 0.356041379 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.786922e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.093990e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.397156e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.451157 sec +INFO: No Floating Point Exceptions have been reported + 1,903,287,247 cycles # 2.854 GHz + 2,701,712,617 instructions # 1.42 insn per cycle + 0.723485810 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.631468e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.076387e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.412421e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.533478 sec +INFO: No Floating Point Exceptions have been reported + 2,199,085,341 cycles # 2.859 GHz + 3,170,238,292 instructions # 1.44 insn per cycle + 0.827674110 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/GPU) = 1.424749e-01 +Avg ME (F77/GPU) = 0.14247482577104625 +Relative difference = 5.209967070245855e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/gcheck.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.015955e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.037432e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.037432e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.635549 sec +INFO: No Floating Point Exceptions have been reported + 4,738,698,726 cycles # 2.891 GHz + 13,451,375,314 instructions # 2.84 insn per cycle + 1.639835424 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 827) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482734618697 +Relative difference = 5.099411406595165e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.861825e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.933431e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.933431e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.902515 sec +INFO: No Floating Point Exceptions have been reported + 2,609,184,588 cycles # 2.879 GHz + 7,388,302,466 instructions # 2.83 insn per cycle + 0.906730854 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3062) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482734618697 +Relative difference = 5.099411406595165e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/gcheck.exe: Segmentation fault - 890,197,611 cycles:u # 2.060 GHz (76.21%) - 2,053,177 stalled-cycles-frontend:u # 0.23% frontend cycles idle (75.95%) - 5,438,472 stalled-cycles-backend:u # 0.61% backend cycles idle (76.00%) - 1,324,555,031 instructions:u # 1.49 insn per cycle - # 0.00 stalled cycles per insn (75.78%) - 0.454474968 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/runTest.exe -Memory access fault by GPU node-4 (Agent handle: 0x601530) on address 0x14e1e6159000. Reason: Unknown. +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.186143e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.397015e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.397015e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.536785 sec +INFO: No Floating Point Exceptions have been reported + 1,467,516,245 cycles # 2.716 GHz + 3,055,002,540 instructions # 2.08 insn per cycle + 0.540922505 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2990) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.616442e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.883140e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.883140e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.475861 sec +INFO: No Floating Point Exceptions have been reported + 1,308,292,499 cycles # 2.728 GHz + 2,930,815,465 instructions # 2.24 insn per cycle + 0.480084547 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2775) (512y: 110) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.255482e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.357706e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.357706e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.751345 sec +INFO: No Floating Point Exceptions have been reported + 1,365,251,936 cycles # 1.808 GHz + 1,969,248,646 instructions # 1.44 insn per cycle + 0.755562444 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1676) (512y: 114) (512z: 2171) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt index ae6d2951a4..2de5598db8 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-05-15_16:44:17 +DATE: 2024-05-15_09:38:14 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.603796e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.458436e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.502036e+07 ) sec^-1 -MeanMatrixElemValue = ( 7.088120e+00 +- 1.629041e-01 ) GeV^0 -TOTAL : 0.383101 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.605271e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.089863e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.185959e+08 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 0.529511 sec INFO: No Floating Point Exceptions have been reported - 939,106,630 cycles:u # 2.343 GHz (74.25%) - 2,223,440 stalled-cycles-frontend:u # 0.24% frontend cycles idle (74.30%) - 5,823,072 stalled-cycles-backend:u # 0.62% backend cycles idle (74.65%) - 1,484,202,719 instructions:u # 1.58 insn per cycle - # 0.00 stalled cycles per insn (75.99%) - 0.435959895 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/runTest.exe + 2,190,461,080 cycles # 2.819 GHz + 3,136,910,301 instructions # 1.43 insn per cycle + 0.833966524 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 4.313472e+00 -Avg ME (F77/GPU) = 4.3134710926110271 -Relative difference = 2.1036162350152416e-07 +Avg ME (F77/GPU) = 4.3134710926110280 +Relative difference = 2.1036162329561614e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.328488e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.384816e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.384816e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 4.686474 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.866601e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.916510e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.916510e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 5.729391 sec INFO: No Floating Point Exceptions have been reported - 16,124,595,636 cycles:u # 3.433 GHz (74.93%) - 9,541,345 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.97%) - 3,653,068,162 stalled-cycles-backend:u # 22.66% backend cycles idle (74.97%) - 41,670,912,640 instructions:u # 2.58 insn per cycle - # 0.09 stalled cycles per insn (74.97%) - 4.701938773 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/runTest.exe + 16,430,554,735 cycles # 2.866 GHz + 42,487,606,064 instructions # 2.59 insn per cycle + 5.734791443 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 711) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105795 Relative difference = 2.1036172727915933e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.051872e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.236166e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.236166e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 2.774123 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.252768e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.421849e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.421849e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.335881 sec INFO: No Floating Point Exceptions have been reported - 9,335,501,881 cycles:u # 3.349 GHz (75.04%) - 8,987,352 stalled-cycles-frontend:u # 0.10% frontend cycles idle (75.04%) - 1,824,361,010 stalled-cycles-backend:u # 19.54% backend cycles idle (75.03%) - 26,239,572,430 instructions:u # 2.81 insn per cycle - # 0.07 stalled cycles per insn (75.04%) - 3.094497404 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2294) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/runTest.exe + 9,601,506,382 cycles # 2.874 GHz + 26,317,441,050 instructions # 2.74 insn per cycle + 3.341476773 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2388) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105804 Relative difference = 2.103617270732513e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 7.359529e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.906295e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.906295e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 1.611971 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.343541e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.785141e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.785141e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.074426 sec +INFO: No Floating Point Exceptions have been reported + 5,659,942,551 cycles # 2.723 GHz + 12,027,441,307 instructions # 2.13 insn per cycle + 2.080055501 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2532) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134710926107935 +Relative difference = 2.103616776553298e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.899321e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.433990e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.433990e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 1.887685 sec INFO: No Floating Point Exceptions have been reported - 5,295,374,231 cycles:u # 3.258 GHz (74.95%) - 7,540,695 stalled-cycles-frontend:u # 0.14% frontend cycles idle (74.90%) - 487,371,265 stalled-cycles-backend:u # 9.20% backend cycles idle (74.92%) - 12,305,551,328 instructions:u # 2.32 insn per cycle - # 0.04 stalled cycles per insn (74.92%) - 1.629855341 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2548) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/runTest.exe + 5,183,911,111 cycles # 2.739 GHz + 11,155,246,112 instructions # 2.15 insn per cycle + 1.893248416 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2195) (512y: 148) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.585962e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.778956e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.778956e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.035606 sec +INFO: No Floating Point Exceptions have been reported + 5,553,509,524 cycles # 1.827 GHz + 8,071,821,721 instructions # 1.45 insn per cycle + 3.041168439 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1471) (512y: 129) (512z: 1684) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134710926107935 +Relative difference = 2.103616776553298e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt index bbcfb1ad25..0e03f4fe66 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-05-15_16:44:31 +DATE: 2024-05-15_09:38:39 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.527768e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.394131e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.433838e+07 ) sec^-1 -MeanMatrixElemValue = ( 7.088120e+00 +- 1.629041e-01 ) GeV^0 -TOTAL : 0.402263 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.605793e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.092662e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.188361e+08 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 0.527275 sec INFO: No Floating Point Exceptions have been reported - 932,334,891 cycles:u # 2.280 GHz (74.47%) - 2,201,257 stalled-cycles-frontend:u # 0.24% frontend cycles idle (74.41%) - 5,652,546 stalled-cycles-backend:u # 0.61% backend cycles idle (75.14%) - 1,478,530,745 instructions:u # 1.59 insn per cycle - # 0.00 stalled cycles per insn (75.10%) - 0.458497357 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/runTest.exe + 2,209,654,097 cycles # 2.866 GHz + 3,153,659,564 instructions # 1.43 insn per cycle + 0.829664465 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 4.313472e+00 -Avg ME (F77/GPU) = 4.3134710926110271 -Relative difference = 2.1036162350152416e-07 +Avg ME (F77/GPU) = 4.3134710926110280 +Relative difference = 2.1036162329561614e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.337798e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.394950e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.394950e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 4.674231 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.917932e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.970345e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.970345e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 5.578442 sec INFO: No Floating Point Exceptions have been reported - 15,926,402,288 cycles:u # 3.398 GHz (74.95%) - 9,876,431 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.92%) - 35,516,406 stalled-cycles-backend:u # 0.22% backend cycles idle (74.91%) - 42,467,880,880 instructions:u # 2.67 insn per cycle - # 0.00 stalled cycles per insn (74.98%) - 4.691836216 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 568) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/runTest.exe + 16,270,300,978 cycles # 2.915 GHz + 43,269,542,953 instructions # 2.66 insn per cycle + 5.583862261 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 662) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105795 Relative difference = 2.1036172727915933e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.273752e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.480636e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.480636e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 2.640756 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.295405e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.467767e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.467767e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.294249 sec INFO: No Floating Point Exceptions have been reported - 8,858,395,530 cycles:u # 3.338 GHz (75.01%) - 9,289,255 stalled-cycles-frontend:u # 0.10% frontend cycles idle (74.98%) - 674,552,327 stalled-cycles-backend:u # 7.61% backend cycles idle (74.98%) - 25,064,616,066 instructions:u # 2.83 insn per cycle - # 0.03 stalled cycles per insn (74.99%) - 2.658338391 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2130) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/runTest.exe + 9,439,488,737 cycles # 2.862 GHz + 25,429,298,825 instructions # 2.69 insn per cycle + 3.299556098 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2268) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105804 Relative difference = 2.103617270732513e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.611385e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.050599e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.050599e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 1.772426 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.717381e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.065402e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.065402e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.334499 sec +INFO: No Floating Point Exceptions have been reported + 6,268,270,664 cycles # 2.680 GHz + 13,639,651,270 instructions # 2.18 insn per cycle + 2.339998334 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2629) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134710926107935 +Relative difference = 2.103616776553298e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.896625e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.269348e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.269348e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.253497 sec INFO: No Floating Point Exceptions have been reported - 5,851,359,243 cycles:u # 3.277 GHz (74.95%) - 9,366,635 stalled-cycles-frontend:u # 0.16% frontend cycles idle (74.91%) - 1,273,367,798 stalled-cycles-backend:u # 21.76% backend cycles idle (74.92%) - 13,603,531,303 instructions:u # 2.32 insn per cycle - # 0.09 stalled cycles per insn (74.93%) - 1.789679630 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2610) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/runTest.exe + 6,047,001,238 cycles # 2.678 GHz + 12,723,590,718 instructions # 2.10 insn per cycle + 2.258908112 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2146) (512y: 296) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.435146e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.613567e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.613567e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.164665 sec +INFO: No Floating Point Exceptions have been reported + 5,622,890,350 cycles # 1.774 GHz + 8,925,668,868 instructions # 1.59 insn per cycle + 3.170077466 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1357) (512y: 171) (512z: 1777) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134710926107935 +Relative difference = 2.103616776553298e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt index fbecf1d626..6bdff93d6b 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt @@ -1,181 +1,228 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-05-15_16:44:45 +DATE: 2024-05-15_09:39:04 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.357473e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.206105e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.296255e+08 ) sec^-1 -MeanMatrixElemValue = ( 6.834692e+00 +- 1.463624e-01 ) GeV^0 -TOTAL : 0.325039 sec +EvtsPerSec[Rmb+ME] (23) = ( 9.598682e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.496542e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.761806e+08 ) sec^-1 +MeanMatrixElemValue = ( 7.154219e+00 +- 1.620281e-01 ) GeV^0 +TOTAL : 0.485583 sec INFO: No Floating Point Exceptions have been reported - 766,423,261 cycles:u # 2.194 GHz (73.78%) - 2,101,254 stalled-cycles-frontend:u # 0.27% frontend cycles idle (73.18%) - 5,549,032 stalled-cycles-backend:u # 0.72% backend cycles idle (74.86%) - 1,246,921,500 instructions:u # 1.63 insn per cycle - # 0.00 stalled cycles per insn (75.94%) - 0.378180737 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/runTest.exe + 2,004,894,430 cycles # 2.817 GHz + 2,895,867,287 instructions # 1.44 insn per cycle + 0.768790632 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/GPU) = 4.313524e+00 -Avg ME (F77/GPU) = 4.3135525460820645 -Relative difference = 6.617809954082434e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/GPU) = 4.313490e+00 +Avg ME (F77/GPU) = 4.3136695463908836 +Relative difference = 4.162439020000051e-05 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.691865e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.764922e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.764922e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.315915e+00 +- 1.953829e-01 ) GeV^0 -TOTAL : 4.045298 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.971680e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.029242e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.029242e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.175644e+00 +- 1.658767e-01 ) GeV^0 +TOTAL : 5.407539 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 13,816,308,539 cycles:u # 3.408 GHz (74.96%) - 17,510,892 stalled-cycles-frontend:u # 0.13% frontend cycles idle (74.94%) - 1,202,254,806 stalled-cycles-backend:u # 8.70% backend cycles idle (74.95%) - 40,754,489,444 instructions:u # 2.95 insn per cycle - # 0.03 stalled cycles per insn (74.95%) - 4.058642606 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 598) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/runTest.exe + 15,732,291,805 cycles # 2.908 GHz + 42,223,773,670 instructions # 2.68 insn per cycle + 5.413095010 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 601) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313574e+00 -Avg ME (F77/C++) = 4.3135737644042820 -Relative difference = 5.461728906135488e-08 +Avg ME (F77/C++) = 4.3135739049175754 +Relative difference = 2.2042608890083832e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 5.928754e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.311008e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.311008e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.315915e+00 +- 1.953829e-01 ) GeV^0 -TOTAL : 1.917449 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.576683e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.925395e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.925395e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.175642e+00 +- 1.658767e-01 ) GeV^0 +TOTAL : 2.380795 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,451,480,565 cycles:u # 3.350 GHz (75.04%) - 12,106,927 stalled-cycles-frontend:u # 0.19% frontend cycles idle (75.08%) - 2,462,093,126 stalled-cycles-backend:u # 38.16% backend cycles idle (75.08%) - 16,252,754,111 instructions:u # 2.52 insn per cycle - # 0.15 stalled cycles per insn (75.08%) - 1.930581694 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2735) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/runTest.exe + 6,946,843,290 cycles # 2.912 GHz + 16,919,386,922 instructions # 2.44 insn per cycle + 2.386268346 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2983) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313573e+00 -Avg ME (F77/C++) = 4.3135733148083091 -Relative difference = 7.298086973342306e-08 +Avg ME (C++/C++) = 4.313572e+00 +Avg ME (F77/C++) = 4.3135722205042839 +Relative difference = 5.111872113533787e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.154310e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.294552e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.294552e+06 ) sec^-1 -MeanMatrixElemValue = ( 7.289197e+00 +- 1.809101e-01 ) GeV^0 -TOTAL : 1.057858 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.963539e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.006176e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.006176e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 1.404224 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 3,495,103,075 cycles:u # 3.277 GHz (74.56%) - 8,963,767 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.80%) - 1,195,464,933 stalled-cycles-backend:u # 34.20% backend cycles idle (75.17%) - 8,024,650,910 instructions:u # 2.30 insn per cycle - # 0.15 stalled cycles per insn (75.25%) - 1.070740395 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3309) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/runTest.exe + 3,861,367,030 cycles # 2.741 GHz + 7,989,911,144 instructions # 2.07 insn per cycle + 1.409406388 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3289) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313565e+00 -Avg ME (F77/C++) = 4.3135650876211002 -Relative difference = 2.03129199623388e-08 +Avg ME (F77/C++) = 4.3135645699221641 +Relative difference = 9.97035713074993e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.421817e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.587592e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.587592e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 1.332462 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 3,663,720,767 cycles # 2.741 GHz + 7,492,536,355 instructions # 2.05 insn per cycle + 1.337531859 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3036) (512y: 23) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313565e+00 +Avg ME (F77/C++) = 4.3135645699221641 +Relative difference = 9.97035713074993e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.252793e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.863234e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.863234e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 1.765566 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 3,314,814,363 cycles # 1.873 GHz + 5,987,731,552 instructions # 1.81 insn per cycle + 1.770605498 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2418) (512y: 32) (512z: 2031) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313564e+00 +Avg ME (F77/C++) = 4.3135643783025444 +Relative difference = 8.770069111236825e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt index 61935ba277..7ffa995122 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt @@ -1,181 +1,228 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-05-15_16:44:57 +DATE: 2024-05-15_09:39:24 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.877469e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.434218e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.544198e+08 ) sec^-1 -MeanMatrixElemValue = ( 6.834692e+00 +- 1.463624e-01 ) GeV^0 -TOTAL : 0.328664 sec +EvtsPerSec[Rmb+ME] (23) = ( 9.587420e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.516829e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.786326e+08 ) sec^-1 +MeanMatrixElemValue = ( 7.154219e+00 +- 1.620281e-01 ) GeV^0 +TOTAL : 0.483303 sec INFO: No Floating Point Exceptions have been reported - 781,773,953 cycles:u # 2.213 GHz (75.17%) - 2,197,289 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.96%) - 4,528,950 stalled-cycles-backend:u # 0.58% backend cycles idle (74.26%) - 1,362,708,003 instructions:u # 1.74 insn per cycle - # 0.00 stalled cycles per insn (74.60%) - 0.381588531 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/runTest.exe + 2,019,688,314 cycles # 2.847 GHz + 2,894,885,573 instructions # 1.43 insn per cycle + 0.766503916 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/GPU) = 4.313524e+00 -Avg ME (F77/GPU) = 4.3135525460820645 -Relative difference = 6.617809954082434e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/GPU) = 4.313490e+00 +Avg ME (F77/GPU) = 4.3136695463908836 +Relative difference = 4.162439020000051e-05 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.695325e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.769333e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.769333e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.315915e+00 +- 1.953829e-01 ) GeV^0 -TOTAL : 4.041029 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.023013e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.083107e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.083107e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.175644e+00 +- 1.658767e-01 ) GeV^0 +TOTAL : 5.272574 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 13,837,051,782 cycles:u # 3.417 GHz (74.88%) - 17,812,024 stalled-cycles-frontend:u # 0.13% frontend cycles idle (74.92%) - 460,256,248 stalled-cycles-backend:u # 3.33% backend cycles idle (74.92%) - 41,450,204,858 instructions:u # 3.00 insn per cycle - # 0.01 stalled cycles per insn (74.99%) - 4.054379804 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 555) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/runTest.exe + 15,334,092,326 cycles # 2.907 GHz + 42,471,073,635 instructions # 2.77 insn per cycle + 5.277803364 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 559) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313574e+00 -Avg ME (F77/C++) = 4.3135737563716248 -Relative difference = 5.647947044645654e-08 +Avg ME (F77/C++) = 4.3135739491553977 +Relative difference = 1.1787117204016727e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.985228e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.519730e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.519730e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.315915e+00 +- 1.953829e-01 ) GeV^0 -TOTAL : 1.650076 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.212204e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.665805e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.665805e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.175642e+00 +- 1.658767e-01 ) GeV^0 +TOTAL : 2.100750 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 5,523,168,879 cycles:u # 3.330 GHz (74.94%) - 12,695,760 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.92%) - 1,257,259,572 stalled-cycles-backend:u # 22.76% backend cycles idle (74.92%) - 16,074,513,667 instructions:u # 2.91 insn per cycle - # 0.08 stalled cycles per insn (74.94%) - 1.662951394 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2557) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/runTest.exe + 6,119,971,132 cycles # 2.907 GHz + 16,261,719,943 instructions # 2.66 insn per cycle + 2.106003651 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2702) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313573e+00 -Avg ME (F77/C++) = 4.3135733148083091 -Relative difference = 7.298086973342306e-08 +Avg ME (C++/C++) = 4.313572e+00 +Avg ME (F77/C++) = 4.3135722205042839 +Relative difference = 5.111872113533787e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 8.429409e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.164470e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.164470e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.289198e+00 +- 1.809101e-01 ) GeV^0 -TOTAL : 1.392686 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.593790e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.292755e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.292755e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 1.679487 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 4,615,911,149 cycles:u # 3.294 GHz (74.95%) - 9,345,938 stalled-cycles-frontend:u # 0.20% frontend cycles idle (74.89%) - 1,844,391,987 stalled-cycles-backend:u # 39.96% backend cycles idle (74.89%) - 10,105,850,224 instructions:u # 2.19 insn per cycle - # 0.18 stalled cycles per insn (74.90%) - 1.405788920 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3914) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/runTest.exe + 4,592,127,675 cycles # 2.728 GHz + 9,041,761,961 instructions # 1.97 insn per cycle + 1.684724917 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3558) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313565e+00 -Avg ME (F77/C++) = 4.3135650896001607 -Relative difference = 2.0771719231865954e-08 +Avg ME (F77/C++) = 4.3135645687580109 +Relative difference = 9.997345323075056e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.878026e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.633469e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.633469e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 1.612554 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 4,407,162,057 cycles # 2.725 GHz + 8,532,871,968 instructions # 1.94 insn per cycle + 1.617874278 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3311) (512y: 10) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313565e+00 +Avg ME (F77/C++) = 4.3135645687580109 +Relative difference = 9.997345323075056e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.270771e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.879084e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.879084e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 1.760317 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 3,305,853,448 cycles # 1.874 GHz + 5,956,992,727 instructions # 1.80 insn per cycle + 1.765293566 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2357) (512y: 32) (512z: 2014) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313564e+00 +Avg ME (F77/C++) = 4.3135643783025444 +Relative difference = 8.770069111236825e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt index 6fca09810a..80813ef825 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt @@ -1,105 +1,102 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-05-15_16:45:09 +DATE: 2024-05-15_09:39:45 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.555583e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.455825e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.497430e+07 ) sec^-1 -MeanMatrixElemValue = ( 7.088120e+00 +- 1.629041e-01 ) GeV^0 -TOTAL : 0.382816 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.601305e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.088563e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.185234e+08 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 0.525521 sec INFO: No Floating Point Exceptions have been reported - 928,372,758 cycles:u # 2.270 GHz (75.49%) - 2,253,270 stalled-cycles-frontend:u # 0.24% frontend cycles idle (74.70%) - 5,952,343 stalled-cycles-backend:u # 0.64% backend cycles idle (74.88%) - 1,478,001,511 instructions:u # 1.59 insn per cycle - # 0.00 stalled cycles per insn (74.89%) - 0.437350694 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/runTest.exe + 2,168,258,905 cycles # 2.853 GHz + 3,138,873,540 instructions # 1.45 insn per cycle + 0.816588080 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 4.313472e+00 Avg ME (F77/GPU) = 4.3134711012809239 Relative difference = 2.0835166567625394e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.308780e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.363378e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.363378e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 4.728958 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.766882e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.810773e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.810773e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 6.045671 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 16,171,155,664 cycles:u # 3.411 GHz (74.92%) - 38,149,325 stalled-cycles-frontend:u # 0.24% frontend cycles idle (74.93%) - 2,405,972,446 stalled-cycles-backend:u # 14.88% backend cycles idle (75.01%) - 41,551,815,411 instructions:u # 2.57 insn per cycle - # 0.06 stalled cycles per insn (75.03%) - 4.746198211 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 635) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/runTest.exe + 17,574,296,306 cycles # 2.905 GHz + 41,766,230,605 instructions # 2.38 insn per cycle + 6.051094127 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 655) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -107,32 +104,29 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.244198e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.448974e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.448974e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 2.656590 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.044146e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.188152e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.188152e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.555742 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 8,978,630,703 cycles:u # 3.363 GHz (74.87%) - 15,441,593 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.83%) - 1,827,739,952 stalled-cycles-backend:u # 20.36% backend cycles idle (74.93%) - 25,970,891,941 instructions:u # 2.89 insn per cycle - # 0.07 stalled cycles per insn (75.08%) - 2.673866298 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2393) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/runTest.exe + 10,147,045,588 cycles # 2.850 GHz + 26,353,270,775 instructions # 2.60 insn per cycle + 3.561135432 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2438) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -140,32 +134,59 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 7.363289e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.903132e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.903132e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 1.612163 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.562224e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.889108e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.889108e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.410975 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 6,505,852,527 cycles # 2.694 GHz + 12,121,176,067 instructions # 1.86 insn per cycle + 2.416418980 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2718) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134712319139954 +Relative difference = 1.7806676491157786e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.021202e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.409456e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.409456e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.199331 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 5,294,121,423 cycles:u # 3.258 GHz (74.95%) - 18,540,101 stalled-cycles-frontend:u # 0.35% frontend cycles idle (74.90%) - 1,333,246,582 stalled-cycles-backend:u # 25.18% backend cycles idle (74.91%) - 12,289,556,764 instructions:u # 2.32 insn per cycle - # 0.11 stalled cycles per insn (74.91%) - 1.629350250 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2788) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/runTest.exe + 6,010,625,125 cycles # 2.728 GHz + 11,226,506,585 instructions # 1.87 insn per cycle + 2.204814427 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2369) (512y: 150) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -173,9 +194,35 @@ Avg ME (F77/C++) = 4.3134712319139954 Relative difference = 1.7806676491157786e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.252088e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.409014e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.409014e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.335937 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 6,078,181,761 cycles # 1.820 GHz + 8,214,656,148 instructions # 1.35 insn per cycle + 3.341517301 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1787) (512y: 134) (512z: 1755) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134712319139954 +Relative difference = 1.7806676491157786e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt index 2e6d2d9565..58967d5980 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt @@ -1,105 +1,102 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-05-15_16:45:24 +DATE: 2024-05-15_09:40:11 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.543953e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.380252e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.419621e+07 ) sec^-1 -MeanMatrixElemValue = ( 7.088120e+00 +- 1.629041e-01 ) GeV^0 -TOTAL : 0.380255 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.609019e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.095172e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.191696e+08 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 0.528086 sec INFO: No Floating Point Exceptions have been reported - 914,534,531 cycles:u # 2.249 GHz (74.00%) - 2,062,532 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.34%) - 5,541,049 stalled-cycles-backend:u # 0.61% backend cycles idle (74.67%) - 1,426,424,976 instructions:u # 1.56 insn per cycle - # 0.00 stalled cycles per insn (76.24%) - 0.436887536 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/runTest.exe + 2,169,297,309 cycles # 2.846 GHz + 3,090,623,102 instructions # 1.42 insn per cycle + 0.819052248 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 4.313472e+00 Avg ME (F77/GPU) = 4.3134711012809239 Relative difference = 2.0835166567625394e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.297575e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.351788e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.351788e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 4.749421 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.775858e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.820247e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.820247e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 6.014989 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 16,188,336,443 cycles:u # 3.399 GHz (74.99%) - 32,936,332 stalled-cycles-frontend:u # 0.20% frontend cycles idle (74.97%) - 56,339,433 stalled-cycles-backend:u # 0.35% backend cycles idle (74.97%) - 42,758,696,712 instructions:u # 2.64 insn per cycle - # 0.00 stalled cycles per insn (74.98%) - 4.766796730 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 577) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/runTest.exe + 17,468,050,475 cycles # 2.902 GHz + 43,049,777,107 instructions # 2.46 insn per cycle + 6.020366459 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 651) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -107,32 +104,29 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.205143e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.404626e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.404626e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 2.678632 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.202203e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.362005e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.362005e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.384891 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 9,037,296,715 cycles:u # 3.357 GHz (74.97%) - 15,503,386 stalled-cycles-frontend:u # 0.17% frontend cycles idle (75.04%) - 1,245,986,582 stalled-cycles-backend:u # 13.79% backend cycles idle (75.04%) - 24,980,136,778 instructions:u # 2.76 insn per cycle - # 0.05 stalled cycles per insn (75.04%) - 2.695735212 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2226) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/runTest.exe + 9,778,733,081 cycles # 2.885 GHz + 25,167,358,615 instructions # 2.57 insn per cycle + 3.390286796 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2276) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -140,32 +134,59 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.402147e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.813045e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.813045e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 1.823690 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.282641e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.561981e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.561981e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.558618 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 7,005,125,692 cycles # 2.733 GHz + 12,790,366,975 instructions # 1.83 insn per cycle + 2.564023768 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2699) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134712319139954 +Relative difference = 1.7806676491157786e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.605710e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.928534e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.928534e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.386989 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,060,962,198 cycles:u # 3.300 GHz (74.81%) - 18,276,032 stalled-cycles-frontend:u # 0.30% frontend cycles idle (75.03%) - 697,613,701 stalled-cycles-backend:u # 11.51% backend cycles idle (75.17%) - 13,609,061,030 instructions:u # 2.25 insn per cycle - # 0.05 stalled cycles per insn (75.17%) - 1.840736491 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2930) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/runTest.exe + 6,538,720,575 cycles # 2.734 GHz + 12,108,614,885 instructions # 1.85 insn per cycle + 2.392368081 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2351) (512y: 227) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -173,9 +194,35 @@ Avg ME (F77/C++) = 4.3134712319139954 Relative difference = 1.7806676491157786e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.076149e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.216751e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.216751e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.520703 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 6,385,528,551 cycles # 1.811 GHz + 8,984,473,699 instructions # 1.41 insn per cycle + 3.526309284 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1892) (512y: 178) (512z: 2083) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134712319139954 +Relative difference = 1.7806676491157786e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt index 88c906df0a..bc7d09f4c2 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-05-15_16:43:44 +DATE: 2024-05-15_09:37:13 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/gcheck.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/gcheck.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.571380e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.274438e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.280380e+03 ) sec^-1 -MeanMatrixElemValue = ( 5.989810e-05 +- 3.867612e-05 ) GeV^-4 -TOTAL : 0.397939 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.202067e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.226429e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.231093e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.468626 sec INFO: No Floating Point Exceptions have been reported - 1,042,961,084 cycles:u # 2.645 GHz (72.61%) - 2,254,086 stalled-cycles-frontend:u # 0.22% frontend cycles idle (73.57%) - 5,624,084 stalled-cycles-backend:u # 0.54% backend cycles idle (75.57%) - 1,460,383,473 instructions:u # 1.40 insn per cycle - # 0.00 stalled cycles per insn (75.70%) - 0.444657235 seconds time elapsed + 1,953,008,185 cycles # 2.841 GHz + 2,842,111,675 instructions # 1.46 insn per cycle + 0.746379585 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/gcheck.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.637055e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.860714e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.862696e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.402315e-01 +- 3.184905e-01 ) GeV^-4 -TOTAL : 0.446265 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.845433e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.983223e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.992360e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 +TOTAL : 0.483498 sec INFO: No Floating Point Exceptions have been reported - 1,096,603,690 cycles:u # 2.605 GHz (74.76%) - 2,298,126 stalled-cycles-frontend:u # 0.21% frontend cycles idle (75.32%) - 5,734,466 stalled-cycles-backend:u # 0.52% backend cycles idle (75.32%) - 1,520,111,994 instructions:u # 1.39 insn per cycle - # 0.00 stalled cycles per insn (75.47%) - 0.492332333 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/runTest.exe + 2,009,105,725 cycles # 2.845 GHz + 2,994,395,706 instructions # 1.49 insn per cycle + 0.764235106 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 8.127459e-06 -Avg ME (F77/GPU) = 8.1274562860176587E-006 -Relative difference = 3.3392753387325367e-07 +Avg ME (F77/GPU) = 8.1274562860176604E-006 +Relative difference = 3.3392753366481633e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.055534e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.059258e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.059258e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.136200 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.341006e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.344191e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.344191e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.163389 sec INFO: No Floating Point Exceptions have been reported - 469,638,655 cycles:u # 3.358 GHz (72.85%) - 305,767 stalled-cycles-frontend:u # 0.07% frontend cycles idle (75.72%) - 73,244,886 stalled-cycles-backend:u # 15.60% backend cycles idle (77.13%) - 1,384,227,102 instructions:u # 2.95 insn per cycle - # 0.05 stalled cycles per insn (77.13%) - 0.143889231 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1926) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/runTest.exe + 476,471,724 cycles # 2.858 GHz + 1,396,834,040 instructions # 2.93 insn per cycle + 0.167338927 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3991) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274562860167168E-006 -Relative difference = 3.3392764976441195e-07 +Avg ME (F77/C++) = 8.1274562860167185E-006 +Relative difference = 3.339276495559746e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 9.104355e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.122659e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.122659e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.063783 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.360618e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.372071e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.372071e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.088055 sec INFO: No Floating Point Exceptions have been reported - 221,158,312 cycles:u # 3.286 GHz (72.89%) - 91,517 stalled-cycles-frontend:u # 0.04% frontend cycles idle (76.27%) - 21,802,614 stalled-cycles-backend:u # 9.86% backend cycles idle (76.25%) - 658,008,213 instructions:u # 2.98 insn per cycle - # 0.03 stalled cycles per insn (76.25%) - 0.071927257 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 9270) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/runTest.exe + 246,212,528 cycles # 2.691 GHz + 699,164,769 instructions # 2.84 insn per cycle + 0.092128091 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 9501) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167168E-006 Relative difference = 3.3392764976441195e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.092186e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.101709e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.101709e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.031010 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.397376e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.402958e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.402958e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.042872 sec +INFO: No Floating Point Exceptions have been reported + 121,563,038 cycles # 2.617 GHz + 260,039,900 instructions # 2.14 insn per cycle + 0.046942965 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 8227) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274562860174791E-006 +Relative difference = 3.3392755596761116e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.633036e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.640418e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.640418e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.037445 sec INFO: No Floating Point Exceptions have been reported - 90,316,893 cycles:u # 2.613 GHz (76.96%) - 123,588 stalled-cycles-frontend:u # 0.14% frontend cycles idle (76.90%) - 11,871,527 stalled-cycles-backend:u # 13.14% backend cycles idle (76.90%) - 231,305,307 instructions:u # 2.56 insn per cycle - # 0.05 stalled cycles per insn (76.90%) - 0.038968170 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 8240) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/runTest.exe + 108,374,861 cycles # 2.658 GHz + 240,137,018 instructions # 2.22 insn per cycle + 0.041385204 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 7348) (512y: 150) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.199506e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.204802e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.204802e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.049113 sec +INFO: No Floating Point Exceptions have been reported + 96,354,524 cycles # 1.837 GHz + 138,443,095 instructions # 1.44 insn per cycle + 0.053009261 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1692) (512y: 126) (512z: 6592) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274562860174791E-006 +Relative difference = 3.3392755596761116e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt index e73b1c13cb..a46b9e59f6 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-05-15_16:43:50 +DATE: 2024-05-15_09:37:23 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/gcheck.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/gcheck.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.397596e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.037256e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.048842e+03 ) sec^-1 -MeanMatrixElemValue = ( 5.989810e-05 +- 3.867612e-05 ) GeV^-4 -TOTAL : 0.373237 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.243744e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.268803e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.272916e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.468608 sec INFO: No Floating Point Exceptions have been reported - 996,488,380 cycles:u # 2.539 GHz (73.77%) - 2,078,558 stalled-cycles-frontend:u # 0.21% frontend cycles idle (75.45%) - 5,393,212 stalled-cycles-backend:u # 0.54% backend cycles idle (75.57%) - 1,406,194,147 instructions:u # 1.41 insn per cycle - # 0.00 stalled cycles per insn (75.46%) - 0.418570908 seconds time elapsed + 1,942,160,737 cycles # 2.818 GHz + 2,807,580,866 instructions # 1.45 insn per cycle + 0.748749095 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/gcheck.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.636419e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.866260e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.868188e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.402315e-01 +- 3.184905e-01 ) GeV^-4 -TOTAL : 0.398664 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.939505e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.078673e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.089031e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 +TOTAL : 0.486285 sec INFO: No Floating Point Exceptions have been reported - 1,054,749,544 cycles:u # 2.525 GHz (75.23%) - 2,235,164 stalled-cycles-frontend:u # 0.21% frontend cycles idle (75.15%) - 5,745,937 stalled-cycles-backend:u # 0.54% backend cycles idle (75.06%) - 1,536,892,171 instructions:u # 1.46 insn per cycle - # 0.00 stalled cycles per insn (75.07%) - 0.444437629 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/runTest.exe + 1,994,044,298 cycles # 2.816 GHz + 2,960,112,232 instructions # 1.48 insn per cycle + 0.766502280 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 8.127459e-06 -Avg ME (F77/GPU) = 8.1274562860176587E-006 -Relative difference = 3.3392753387325367e-07 +Avg ME (F77/GPU) = 8.1274562860176604E-006 +Relative difference = 3.3392753366481633e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.046938e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.050659e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.050659e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.135474 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.338112e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.341354e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.341354e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.162906 sec INFO: No Floating Point Exceptions have been reported - 467,381,959 cycles:u # 3.358 GHz (71.77%) - 243,713 stalled-cycles-frontend:u # 0.05% frontend cycles idle (72.79%) - 63,791,087 stalled-cycles-backend:u # 13.65% backend cycles idle (75.68%) - 1,382,666,807 instructions:u # 2.96 insn per cycle - # 0.05 stalled cycles per insn (77.02%) - 0.143497691 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1902) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/runTest.exe + 472,809,594 cycles # 2.841 GHz + 1,391,971,221 instructions # 2.94 insn per cycle + 0.167023132 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3869) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274562860167168E-006 -Relative difference = 3.3392764976441195e-07 +Avg ME (F77/C++) = 8.1274562860167185E-006 +Relative difference = 3.339276495559746e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 9.203739e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.221862e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.221862e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.062385 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.300367e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.312721e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.312721e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.088323 sec INFO: No Floating Point Exceptions have been reported - 229,383,102 cycles:u # 3.477 GHz (72.84%) - 60,907 stalled-cycles-frontend:u # 0.03% frontend cycles idle (76.09%) - 26,884,644 stalled-cycles-backend:u # 11.72% backend cycles idle (75.77%) - 657,622,182 instructions:u # 2.87 insn per cycle - # 0.04 stalled cycles per insn (75.77%) - 0.069942425 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 9325) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/runTest.exe + 245,693,132 cycles # 2.673 GHz + 695,206,213 instructions # 2.83 insn per cycle + 0.092508234 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 9537) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167168E-006 Relative difference = 3.3392764976441195e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.064693e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.073925e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.073925e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.030580 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.397203e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.402750e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.402750e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.042230 sec +INFO: No Floating Point Exceptions have been reported + 120,165,191 cycles # 2.630 GHz + 255,644,551 instructions # 2.13 insn per cycle + 0.046411879 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 8181) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274562860174791E-006 +Relative difference = 3.3392755596761116e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.585636e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.592927e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.592927e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.037822 sec INFO: No Floating Point Exceptions have been reported - 119,094,038 cycles:u # 3.487 GHz (70.73%) - 108,700 stalled-cycles-frontend:u # 0.09% frontend cycles idle (75.15%) - 12,753,984 stalled-cycles-backend:u # 10.71% backend cycles idle (76.62%) - 227,488,187 instructions:u # 1.91 insn per cycle - # 0.06 stalled cycles per insn (76.62%) - 0.039136656 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 8196) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/runTest.exe + 108,310,856 cycles # 2.609 GHz + 235,872,181 instructions # 2.18 insn per cycle + 0.042148151 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 7301) (512y: 150) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.168091e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.173496e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.173496e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.049868 sec +INFO: No Floating Point Exceptions have been reported + 95,370,186 cycles # 1.788 GHz + 133,947,611 instructions # 1.40 insn per cycle + 0.053920194 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1641) (512y: 126) (512z: 6597) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274562860174791E-006 +Relative difference = 3.3392755596761116e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt index 893d812320..be11ef2861 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-05-15_17:28:05 +DATE: 2024-05-15_09:37:33 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/gcheck.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/gcheck.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.560113e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.781566e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.784057e+04 ) sec^-1 -MeanMatrixElemValue = ( 3.100300e-04 +- 2.256635e-04 ) GeV^-4 -TOTAL : 0.704020 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.538093e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.550084e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.553430e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.188142e-04 +- 6.565203e-04 ) GeV^-4 +TOTAL : 0.468250 sec INFO: No Floating Point Exceptions have been reported - 880,874,724 cycles:u # 0.979 GHz (74.04%) - 2,147,177 stalled-cycles-frontend:u # 0.24% frontend cycles idle (74.75%) - 5,547,190 stalled-cycles-backend:u # 0.63% backend cycles idle (74.61%) - 1,325,567,626 instructions:u # 1.50 insn per cycle - # 0.00 stalled cycles per insn (76.31%) - 1.282830870 seconds time elapsed + 1,976,256,669 cycles # 2.846 GHz + 2,862,455,030 instructions # 1.45 insn per cycle + 0.751246171 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/gcheck.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.245493e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.054672e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.062488e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.017784e-02 +- 5.681015e-02 ) GeV^-4 -TOTAL : 0.343683 sec +EvtsPerSec[Rmb+ME] (23) = ( 9.618726e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.730175e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.743445e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.020493e-03 +- 4.025604e-03 ) GeV^-4 +TOTAL : 0.470462 sec INFO: No Floating Point Exceptions have been reported - 894,309,134 cycles:u # 2.418 GHz (74.08%) - 2,093,905 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.07%) - 5,318,017 stalled-cycles-backend:u # 0.59% backend cycles idle (74.98%) - 1,318,810,937 instructions:u # 1.47 insn per cycle - # 0.00 stalled cycles per insn (76.05%) - 0.392355820 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/runTest.exe + 1,945,323,505 cycles # 2.835 GHz + 2,854,909,440 instructions # 1.47 insn per cycle + 0.744046605 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/GPU) = 8.127320e-06 -Avg ME (F77/GPU) = 8.1275379236374627E-006 -Relative difference = 2.681371441780168e-05 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/GPU) = 8.127250e-06 +Avg ME (F77/GPU) = 8.1272870954487585E-006 +Relative difference = 4.564329725014175e-06 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.487502e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.491674e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.491674e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.274747e-01 +- 1.272814e-01 ) GeV^-4 -TOTAL : 0.130349 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.493617e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.497008e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.497008e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.177153e-04 +- 6.554185e-04 ) GeV^-4 +TOTAL : 0.156308 sec INFO: No Floating Point Exceptions have been reported - 425,670,365 cycles:u # 3.266 GHz (74.18%) - 93,417 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.47%) - 52,087,766 stalled-cycles-backend:u # 12.24% backend cycles idle (75.47%) - 1,281,047,178 instructions:u # 3.01 insn per cycle - # 0.04 stalled cycles per insn (75.47%) - 0.138010594 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1611) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/runTest.exe + 460,938,113 cycles # 2.887 GHz + 1,393,426,941 instructions # 3.02 insn per cycle + 0.160226520 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3070) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 8.127810e-06 -Avg ME (F77/C++) = 8.1278100323291073E-006 -Relative difference = 3.977591502689147e-09 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 8.127811e-06 +Avg ME (F77/C++) = 8.1278105211728276E-006 +Relative difference = 5.891219330978222e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.744919e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.751382e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.751382e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.274746e-01 +- 1.272813e-01 ) GeV^-4 -TOTAL : 0.035846 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.209351e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.214237e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.214237e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.177152e-04 +- 6.554185e-04 ) GeV^-4 +TOTAL : 0.048337 sec INFO: No Floating Point Exceptions have been reported - 116,936,791 cycles:u # 2.993 GHz (71.07%) - 85,599 stalled-cycles-frontend:u # 0.07% frontend cycles idle (79.55%) - 17,084,457 stalled-cycles-backend:u # 14.61% backend cycles idle (79.55%) - 343,234,317 instructions:u # 2.94 insn per cycle - # 0.05 stalled cycles per insn (79.55%) - 0.042363924 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 9799) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/runTest.exe + 137,973,328 cycles # 2.667 GHz + 375,722,004 instructions # 2.72 insn per cycle + 0.052233489 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:10134) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 8.127807e-06 -Avg ME (F77/C++) = 8.1278071680283782E-006 -Relative difference = 2.0673273707686565e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 8.127809e-06 +Avg ME (F77/C++) = 8.1278090510674588E-006 +Relative difference = 6.2830535070193674e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.078259e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.113101e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.113101e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.275185e-01 +- 1.273251e-01 ) GeV^-4 -TOTAL : 0.018458 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.738682e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.760820e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.760820e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 +TOTAL : 0.024033 sec +INFO: No Floating Point Exceptions have been reported + 71,841,958 cycles # 2.624 GHz + 146,656,247 instructions # 2.04 insn per cycle + 0.027862943 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 8933) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 8.127537e-06 +Avg ME (F77/C++) = 8.1275366216540664E-006 +Relative difference = 4.655111786058001e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.147936e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.177477e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.177477e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 +TOTAL : 0.021655 sec INFO: No Floating Point Exceptions have been reported - 51,426,519 cycles:u # 2.372 GHz (54.23%) - 85,158 stalled-cycles-frontend:u # 0.17% frontend cycles idle (63.18%) - 5,142,747 stalled-cycles-backend:u # 10.00% backend cycles idle (63.06%) - 161,517,247 instructions:u # 3.14 insn per cycle - # 0.03 stalled cycles per insn (65.21%) - 0.024774594 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 8980) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/runTest.exe + 66,095,413 cycles # 2.653 GHz + 136,520,360 instructions # 2.07 insn per cycle + 0.025536275 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 8164) (512y: 28) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 8.127535e-06 -Avg ME (F77/C++) = 8.1275352476332691E-006 -Relative difference = 3.04684346075092e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 8.127537e-06 +Avg ME (F77/C++) = 8.1275366216540664E-006 +Relative difference = 4.655111786058001e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.342092e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.362212e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.362212e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165747e-04 +- 6.542824e-04 ) GeV^-4 +TOTAL : 0.027664 sec +INFO: No Floating Point Exceptions have been reported + 59,063,912 cycles # 1.907 GHz + 85,174,422 instructions # 1.44 insn per cycle + 0.031611330 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2572) (512y: 32) (512z: 6935) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 8.127537e-06 +Avg ME (F77/C++) = 8.1275369863475849E-006 +Relative difference = 1.6797726498700304e-09 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt index a9e0aa7328..5c9bac71df 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-05-15_17:35:17 +DATE: 2024-05-15_09:37:44 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/gcheck.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/gcheck.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.506626e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.731000e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.733224e+04 ) sec^-1 -MeanMatrixElemValue = ( 3.100300e-04 +- 2.256635e-04 ) GeV^-4 -TOTAL : 0.662830 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.566284e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.577247e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.580384e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.188142e-04 +- 6.565203e-04 ) GeV^-4 +TOTAL : 0.467516 sec INFO: No Floating Point Exceptions have been reported - 841,102,010 cycles:u # 0.952 GHz (74.79%) - 2,443,961 stalled-cycles-frontend:u # 0.29% frontend cycles idle (75.15%) - 9,715,686 stalled-cycles-backend:u # 1.16% backend cycles idle (74.73%) - 1,393,812,582 instructions:u # 1.66 insn per cycle - # 0.01 stalled cycles per insn (74.73%) - 1.431059788 seconds time elapsed + 1,965,266,310 cycles # 2.854 GHz + 2,875,934,182 instructions # 1.46 insn per cycle + 0.745802230 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/gcheck.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.112275e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.921648e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.928652e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.017784e-02 +- 5.681015e-02 ) GeV^-4 -TOTAL : 0.341706 sec +EvtsPerSec[Rmb+ME] (23) = ( 9.913393e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.003584e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.005156e+06 ) sec^-1 +MeanMatrixElemValue = ( 8.020495e-03 +- 4.025606e-03 ) GeV^-4 +TOTAL : 0.469176 sec INFO: No Floating Point Exceptions have been reported - 887,566,745 cycles:u # 2.451 GHz (73.80%) - 2,141,582 stalled-cycles-frontend:u # 0.24% frontend cycles idle (75.53%) - 5,143,326 stalled-cycles-backend:u # 0.58% backend cycles idle (75.73%) - 1,324,273,556 instructions:u # 1.49 insn per cycle - # 0.00 stalled cycles per insn (76.07%) - 0.387872508 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/runTest.exe + 1,950,555,056 cycles # 2.845 GHz + 2,785,891,797 instructions # 1.43 insn per cycle + 0.743443978 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/GPU) = 8.127320e-06 -Avg ME (F77/GPU) = 8.1275379236391975E-006 -Relative difference = 2.681371463124516e-05 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/GPU) = 8.127250e-06 +Avg ME (F77/GPU) = 8.1272870252982758E-006 +Relative difference = 4.555698209723637e-06 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.488556e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.492713e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.492713e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.274747e-01 +- 1.272814e-01 ) GeV^-4 -TOTAL : 0.129216 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.466728e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.470173e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.470173e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.177153e-04 +- 6.554185e-04 ) GeV^-4 +TOTAL : 0.156828 sec INFO: No Floating Point Exceptions have been reported - 407,181,652 cycles:u # 3.136 GHz (75.38%) - 88,691 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.38%) - 54,009,549 stalled-cycles-backend:u # 13.26% backend cycles idle (75.38%) - 1,287,120,920 instructions:u # 3.16 insn per cycle - # 0.04 stalled cycles per insn (75.36%) - 0.137386430 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1591) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/runTest.exe + 459,335,817 cycles # 2.869 GHz + 1,388,443,506 instructions # 3.02 insn per cycle + 0.160735073 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2959) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 8.127810e-06 -Avg ME (F77/C++) = 8.1278100323291073E-006 -Relative difference = 3.977591502689147e-09 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 8.127811e-06 +Avg ME (F77/C++) = 8.1278105211728276E-006 +Relative difference = 5.891219330978222e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.738379e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.744914e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.744914e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.274746e-01 +- 1.272813e-01 ) GeV^-4 -TOTAL : 0.035335 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.220593e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.225034e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.225034e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.177152e-04 +- 6.554185e-04 ) GeV^-4 +TOTAL : 0.047637 sec INFO: No Floating Point Exceptions have been reported - 133,748,688 cycles:u # 3.469 GHz (63.03%) - 11,172 stalled-cycles-frontend:u # 0.01% frontend cycles idle (68.73%) - 17,933,333 stalled-cycles-backend:u # 13.41% backend cycles idle (79.28%) - 337,611,906 instructions:u # 2.52 insn per cycle - # 0.05 stalled cycles per insn (79.28%) - 0.041878350 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 9782) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/runTest.exe + 136,945,111 cycles # 2.688 GHz + 371,014,842 instructions # 2.71 insn per cycle + 0.051574317 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:10117) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 8.127807e-06 -Avg ME (F77/C++) = 8.1278071680283782E-006 -Relative difference = 2.0673273707686565e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 8.127809e-06 +Avg ME (F77/C++) = 8.1278090510674588E-006 +Relative difference = 6.2830535070193674e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.065045e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.100324e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.100324e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.275185e-01 +- 1.273251e-01 ) GeV^-4 -TOTAL : 0.017937 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.683853e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.705729e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.705729e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 +TOTAL : 0.023981 sec +INFO: No Floating Point Exceptions have been reported + 70,282,423 cycles # 2.572 GHz + 141,956,175 instructions # 2.02 insn per cycle + 0.027873689 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 8887) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 8.127537e-06 +Avg ME (F77/C++) = 8.1275366216540664E-006 +Relative difference = 4.655111786058001e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.113105e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.146058e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.146058e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 +TOTAL : 0.021054 sec INFO: No Floating Point Exceptions have been reported - 37,451,256 cycles:u # 1.775 GHz (62.17%) - 86,031 stalled-cycles-frontend:u # 0.23% frontend cycles idle (62.04%) - 8,141,981 stalled-cycles-backend:u # 21.74% backend cycles idle (61.89%) - 159,221,307 instructions:u # 4.25 insn per cycle - # 0.05 stalled cycles per insn (80.94%) - 0.024327932 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 8934) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/runTest.exe + 63,964,669 cycles # 2.622 GHz + 131,721,900 instructions # 2.06 insn per cycle + 0.024943326 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 8117) (512y: 28) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 8.127535e-06 -Avg ME (F77/C++) = 8.1275352476332691E-006 -Relative difference = 3.04684346075092e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 8.127537e-06 +Avg ME (F77/C++) = 8.1275366216540664E-006 +Relative difference = 4.655111786058001e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.362624e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.384309e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.384309e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165747e-04 +- 6.542824e-04 ) GeV^-4 +TOTAL : 0.026667 sec +INFO: No Floating Point Exceptions have been reported + 57,079,965 cycles # 1.898 GHz + 80,394,091 instructions # 1.41 insn per cycle + 0.030711995 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2521) (512y: 32) (512z: 6939) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 8.127537e-06 +Avg ME (F77/C++) = 8.1275369863475849E-006 +Relative difference = 1.6797726498700304e-09 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt index 2529d656d6..e734cd7c84 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-05-15_16:44:04 +DATE: 2024-05-15_09:37:54 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/gcheck.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/gcheck.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.185953e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.865624e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.868687e+03 ) sec^-1 -MeanMatrixElemValue = ( 5.989810e-05 +- 3.867612e-05 ) GeV^-4 -TOTAL : 0.375613 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.175427e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.197973e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.201962e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.467363 sec INFO: No Floating Point Exceptions have been reported - 996,156,178 cycles:u # 2.505 GHz (74.91%) - 2,179,510 stalled-cycles-frontend:u # 0.22% frontend cycles idle (75.89%) - 5,885,621 stalled-cycles-backend:u # 0.59% backend cycles idle (75.91%) - 1,405,535,472 instructions:u # 1.41 insn per cycle - # 0.00 stalled cycles per insn (75.88%) - 0.423052294 seconds time elapsed + 1,944,433,279 cycles # 2.832 GHz + 2,794,631,115 instructions # 1.44 insn per cycle + 0.744919527 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/gcheck.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.667341e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.862376e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.864834e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.402315e-01 +- 3.184905e-01 ) GeV^-4 -TOTAL : 0.400383 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.821650e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.958980e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.968240e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 +TOTAL : 0.483179 sec INFO: No Floating Point Exceptions have been reported - 1,097,760,192 cycles:u # 2.596 GHz (74.90%) - 2,197,550 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.43%) - 5,522,644 stalled-cycles-backend:u # 0.50% backend cycles idle (75.43%) - 1,523,723,948 instructions:u # 1.39 insn per cycle - # 0.00 stalled cycles per insn (75.13%) - 0.445687936 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/runTest.exe + 2,015,415,638 cycles # 2.820 GHz + 2,968,884,378 instructions # 1.47 insn per cycle + 0.771332872 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 8.127459e-06 -Avg ME (F77/GPU) = 8.1274562879405183E-006 -Relative difference = 3.336909458255062e-07 +Avg ME (F77/GPU) = 8.1274562879405200E-006 +Relative difference = 3.3369094561706885e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.077506e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.080985e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.080985e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.135125 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.315942e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.319079e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.319079e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.164432 sec INFO: No Floating Point Exceptions have been reported - 471,766,950 cycles:u # 3.409 GHz (73.17%) - 109,243 stalled-cycles-frontend:u # 0.02% frontend cycles idle (76.08%) - 67,422,540 stalled-cycles-backend:u # 14.29% backend cycles idle (76.89%) - 1,398,845,419 instructions:u # 2.97 insn per cycle - # 0.05 stalled cycles per insn (76.89%) - 0.141858850 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1922) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/runTest.exe + 477,932,083 cycles # 2.852 GHz + 1,405,300,949 instructions # 2.94 insn per cycle + 0.168321285 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3977) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562948736117E-006 Relative difference = 3.32837900190667e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 9.209901e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.228330e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.228330e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.063049 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.596291e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.608589e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.608589e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.084924 sec INFO: No Floating Point Exceptions have been reported - 225,759,845 cycles:u # 3.383 GHz (72.64%) - 69,036 stalled-cycles-frontend:u # 0.03% frontend cycles idle (76.15%) - 22,005,399 stalled-cycles-backend:u # 9.75% backend cycles idle (76.05%) - 653,313,185 instructions:u # 2.89 insn per cycle - # 0.03 stalled cycles per insn (76.05%) - 0.069812764 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 9115) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/runTest.exe + 242,704,238 cycles # 2.749 GHz + 690,951,648 instructions # 2.85 insn per cycle + 0.088916082 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 9324) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563175290919E-006 Relative difference = 3.3005037703909805e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.099127e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.108206e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.108206e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.030729 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.385512e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.391544e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.391544e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.043079 sec +INFO: No Floating Point Exceptions have been reported + 119,944,572 cycles # 2.584 GHz + 257,870,362 instructions # 2.15 insn per cycle + 0.047077076 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 8244) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274563450143301E-006 +Relative difference = 3.266686019634872e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.610632e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.619218e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.619218e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.037689 sec INFO: No Floating Point Exceptions have been reported - 100,785,402 cycles:u # 2.976 GHz (70.80%) - 100,573 stalled-cycles-frontend:u # 0.10% frontend cycles idle (76.41%) - 11,158,072 stalled-cycles-backend:u # 11.07% backend cycles idle (76.41%) - 230,048,069 instructions:u # 2.28 insn per cycle - # 0.05 stalled cycles per insn (76.41%) - 0.037203826 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 8195) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/runTest.exe + 107,352,604 cycles # 2.621 GHz + 238,160,282 instructions # 2.22 insn per cycle + 0.041537470 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 7342) (512y: 146) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.148363e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.153118e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.153118e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.051095 sec +INFO: No Floating Point Exceptions have been reported + 97,982,591 cycles # 1.800 GHz + 139,316,915 instructions # 1.42 insn per cycle + 0.055005350 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1953) (512y: 122) (512z: 6323) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274563450143301E-006 +Relative difference = 3.266686019634872e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt index a0ef7ed92e..1d6593cd77 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-05-15_16:44:10 +DATE: 2024-05-15_09:38:04 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/gcheck.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/gcheck.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.171905e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.718059e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.723486e+03 ) sec^-1 -MeanMatrixElemValue = ( 5.989810e-05 +- 3.867612e-05 ) GeV^-4 -TOTAL : 0.372303 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.209377e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.234207e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.238140e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.464907 sec INFO: No Floating Point Exceptions have been reported - 1,032,364,763 cycles:u # 2.623 GHz (73.57%) - 2,107,596 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.49%) - 5,375,214 stalled-cycles-backend:u # 0.52% backend cycles idle (75.64%) - 1,465,343,007 instructions:u # 1.42 insn per cycle - # 0.00 stalled cycles per insn (75.65%) - 0.416772045 seconds time elapsed + 1,935,276,475 cycles # 2.820 GHz + 2,840,096,326 instructions # 1.47 insn per cycle + 0.743011197 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/gcheck.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.675908e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.870343e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.872814e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.402315e-01 +- 3.184905e-01 ) GeV^-4 -TOTAL : 0.396176 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.925364e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.067051e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.077128e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 +TOTAL : 0.486874 sec INFO: No Floating Point Exceptions have been reported - 1,063,363,462 cycles:u # 2.553 GHz (75.05%) - 2,257,964 stalled-cycles-frontend:u # 0.21% frontend cycles idle (75.21%) - 5,834,320 stalled-cycles-backend:u # 0.55% backend cycles idle (75.36%) - 1,534,492,292 instructions:u # 1.44 insn per cycle - # 0.00 stalled cycles per insn (75.16%) - 0.437685435 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/runTest.exe + 1,995,098,280 cycles # 2.817 GHz + 2,948,453,060 instructions # 1.48 insn per cycle + 0.766894465 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 8.127459e-06 -Avg ME (F77/GPU) = 8.1274562879405183E-006 -Relative difference = 3.336909458255062e-07 +Avg ME (F77/GPU) = 8.1274562879405200E-006 +Relative difference = 3.3369094561706885e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.041187e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.044595e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.044595e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.135649 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.322818e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.325985e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.325985e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.163700 sec INFO: No Floating Point Exceptions have been reported - 472,699,488 cycles:u # 3.402 GHz (71.30%) - 418,182 stalled-cycles-frontend:u # 0.09% frontend cycles idle (73.66%) - 55,247,137 stalled-cycles-backend:u # 11.69% backend cycles idle (76.54%) - 1,395,574,098 instructions:u # 2.95 insn per cycle - # 0.04 stalled cycles per insn (76.98%) - 0.142378440 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1898) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/runTest.exe + 476,501,052 cycles # 2.855 GHz + 1,400,760,181 instructions # 2.94 insn per cycle + 0.167774037 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3871) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562948736117E-006 Relative difference = 3.32837900190667e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 9.298475e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.316543e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.316543e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.061721 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.565327e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.577506e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.577506e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.084744 sec INFO: No Floating Point Exceptions have been reported - 209,423,840 cycles:u # 3.228 GHz (72.60%) - 182,070 stalled-cycles-frontend:u # 0.09% frontend cycles idle (75.36%) - 23,040,744 stalled-cycles-backend:u # 11.00% backend cycles idle (75.36%) - 649,629,538 instructions:u # 3.10 insn per cycle - # 0.04 stalled cycles per insn (75.36%) - 0.068157982 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 9168) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/runTest.exe + 241,875,702 cycles # 2.743 GHz + 687,449,657 instructions # 2.84 insn per cycle + 0.088833048 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 9365) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563175290919E-006 Relative difference = 3.3005037703909805e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.122127e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.131047e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.131047e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.029826 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.428402e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.434217e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.434217e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.041218 sec +INFO: No Floating Point Exceptions have been reported + 117,891,142 cycles # 2.639 GHz + 253,477,844 instructions # 2.15 insn per cycle + 0.045185184 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 8196) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274563450143301E-006 +Relative difference = 3.266686019634872e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.598792e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.606341e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.606341e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.037394 sec INFO: No Floating Point Exceptions have been reported - 105,559,399 cycles:u # 3.200 GHz (74.06%) - 71,240 stalled-cycles-frontend:u # 0.07% frontend cycles idle (76.17%) - 11,419,080 stalled-cycles-backend:u # 10.82% backend cycles idle (75.79%) - 225,155,426 instructions:u # 2.13 insn per cycle - # 0.05 stalled cycles per insn (75.79%) - 0.036510289 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 8148) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/runTest.exe + 106,494,211 cycles # 2.610 GHz + 233,756,310 instructions # 2.20 insn per cycle + 0.041406370 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 7292) (512y: 146) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.150507e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.155231e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.155231e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.050250 sec +INFO: No Floating Point Exceptions have been reported + 95,987,950 cycles # 1.787 GHz + 134,827,619 instructions # 1.40 insn per cycle + 0.054216055 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1904) (512y: 122) (512z: 6323) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274563450143301E-006 +Relative difference = 3.266686019634872e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt index c7f800787e..4a0cfb79ea 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-05-15_16:43:02 +DATE: 2024-05-15_09:36:03 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.476371e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.343902e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.771863e+08 ) sec^-1 -MeanMatrixElemValue = ( 4.239474e-01 +- 1.231432e-04 ) GeV^0 -TOTAL : 0.350571 sec +EvtsPerSec[Rmb+ME] (23) = ( 6.827605e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.737160e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.400698e+08 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.517447 sec INFO: No Floating Point Exceptions have been reported - 786,304,797 cycles:u # 2.153 GHz (73.92%) - 2,116,401 stalled-cycles-frontend:u # 0.27% frontend cycles idle (75.89%) - 5,713,268 stalled-cycles-backend:u # 0.73% backend cycles idle (75.95%) - 1,300,001,206 instructions:u # 1.65 insn per cycle - # 0.00 stalled cycles per insn (76.12%) - 0.405219761 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/runTest.exe + 2,142,204,484 cycles # 2.853 GHz + 3,057,175,497 instructions # 1.43 insn per cycle + 0.808706740 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 132 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 4.232897e-01 -Avg ME (F77/GPU) = 0.42328961386341935 -Relative difference = 2.0349321196791385e-07 +Avg ME (F77/GPU) = 0.42328961386341946 +Relative difference = 2.034932117056294e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.173463e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.328976e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.328976e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 1.083150 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.781022e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.130898e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.130898e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 1.185103 sec INFO: No Floating Point Exceptions have been reported - 3,499,775,483 cycles:u # 3.190 GHz (74.72%) - 8,159,223 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.54%) - 9,448,324 stalled-cycles-backend:u # 0.27% backend cycles idle (74.88%) - 8,541,535,095 instructions:u # 2.44 insn per cycle - # 0.00 stalled cycles per insn (75.21%) - 1.100930856 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 422) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/runTest.exe + 3,455,291,899 cycles # 2.905 GHz + 8,714,409,721 instructions # 2.52 insn per cycle + 1.190503621 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 458) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341951 Relative difference = 2.0349321157448718e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.166111e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.820275e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.820275e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 0.673909 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.557149e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.023679e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.023679e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.787167 sec INFO: No Floating Point Exceptions have been reported - 2,076,471,270 cycles:u # 3.022 GHz (74.43%) - 8,975,736 stalled-cycles-frontend:u # 0.43% frontend cycles idle (74.43%) - 13,351,309 stalled-cycles-backend:u # 0.64% backend cycles idle (74.99%) - 5,324,487,259 instructions:u # 2.56 insn per cycle - # 0.00 stalled cycles per insn (75.56%) - 0.690647397 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1199) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/runTest.exe + 2,200,314,542 cycles # 2.778 GHz + 5,465,483,454 instructions # 2.48 insn per cycle + 0.792724066 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1298) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341951 Relative difference = 2.0349321157448718e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.504732e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.208368e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.208368e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 0.488940 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.319860e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.471499e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.471499e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.566271 sec INFO: No Floating Point Exceptions have been reported - 1,428,607,215 cycles:u # 2.842 GHz (73.80%) - 8,481,300 stalled-cycles-frontend:u # 0.59% frontend cycles idle (73.79%) - 8,503,092 stalled-cycles-backend:u # 0.60% backend cycles idle (74.67%) - 3,090,620,907 instructions:u # 2.16 insn per cycle - # 0.00 stalled cycles per insn (75.47%) - 0.506051388 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1429) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/runTest.exe + 1,593,859,432 cycles # 2.791 GHz + 3,181,682,971 instructions # 2.00 insn per cycle + 0.571741699 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1459) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341946 Relative difference = 2.034932117056294e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.403259e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.666143e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.666143e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.551298 sec +INFO: No Floating Point Exceptions have been reported + 1,554,857,410 cycles # 2.795 GHz + 3,083,603,727 instructions # 1.98 insn per cycle + 0.556804001 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1274) (512y: 95) (512z: 0) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 4.232897e-01 +Avg ME (F77/C++) = 0.42328961386341946 +Relative difference = 2.034932117056294e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.142477e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.082178e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.082178e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.605015 sec +INFO: No Floating Point Exceptions have been reported + 1,346,241,870 cycles # 2.207 GHz + 2,376,266,453 instructions # 1.77 insn per cycle + 0.610579465 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 584) (512y: 62) (512z: 953) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 4.232897e-01 +Avg ME (F77/C++) = 0.42328961386341946 +Relative difference = 2.034932117056294e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt index ac566b37a5..6dd9e0ac51 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-05-15_16:43:09 +DATE: 2024-05-15_09:36:15 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.890074e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.965131e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.585286e+08 ) sec^-1 -MeanMatrixElemValue = ( 4.239474e-01 +- 1.231432e-04 ) GeV^0 -TOTAL : 0.337549 sec +EvtsPerSec[Rmb+ME] (23) = ( 6.940650e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.318045e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.789707e+08 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.518906 sec INFO: No Floating Point Exceptions have been reported - 761,489,259 cycles:u # 2.100 GHz (74.96%) - 2,205,568 stalled-cycles-frontend:u # 0.29% frontend cycles idle (74.85%) - 6,065,189 stalled-cycles-backend:u # 0.80% backend cycles idle (74.84%) - 1,348,029,695 instructions:u # 1.77 insn per cycle - # 0.00 stalled cycles per insn (75.93%) - 0.389753644 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/runTest.exe + 2,145,710,295 cycles # 2.853 GHz + 3,048,382,925 instructions # 1.42 insn per cycle + 0.810167134 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 4.232897e-01 -Avg ME (F77/GPU) = 0.42328961386341935 -Relative difference = 2.0349321196791385e-07 +Avg ME (F77/GPU) = 0.42328961386341946 +Relative difference = 2.034932117056294e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.182416e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.338716e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.338716e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 1.075977 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.840959e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.138397e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.138397e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 1.178485 sec INFO: No Floating Point Exceptions have been reported - 3,463,923,038 cycles:u # 3.180 GHz (74.92%) - 8,991,710 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.03%) - 17,487,568 stalled-cycles-backend:u # 0.50% backend cycles idle (75.03%) - 8,553,903,431 instructions:u # 2.47 insn per cycle - # 0.00 stalled cycles per insn (75.03%) - 1.094262453 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 356) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/runTest.exe + 3,433,874,218 cycles # 2.901 GHz + 8,629,115,026 instructions # 2.51 insn per cycle + 1.184073779 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 403) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341951 Relative difference = 2.0349321157448718e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.162784e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.819432e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.819432e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 0.673987 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.605196e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.108330e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.108330e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.766522 sec INFO: No Floating Point Exceptions have been reported - 2,083,329,582 cycles:u # 3.034 GHz (74.44%) - 9,077,629 stalled-cycles-frontend:u # 0.44% frontend cycles idle (74.40%) - 18,145,043 stalled-cycles-backend:u # 0.87% backend cycles idle (74.94%) - 5,268,672,147 instructions:u # 2.53 insn per cycle - # 0.00 stalled cycles per insn (75.52%) - 0.690232746 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1165) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/runTest.exe + 2,179,218,947 cycles # 2.825 GHz + 5,398,904,070 instructions # 2.48 insn per cycle + 0.772033522 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1258) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341951 Relative difference = 2.0349321157448718e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.496308e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.186579e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.186579e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 0.488787 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.311152e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.464438e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.464438e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.567182 sec INFO: No Floating Point Exceptions have been reported - 1,401,698,846 cycles:u # 2.796 GHz (74.69%) - 8,215,754 stalled-cycles-frontend:u # 0.59% frontend cycles idle (74.47%) - 16,938,722 stalled-cycles-backend:u # 1.21% backend cycles idle (74.52%) - 3,139,297,780 instructions:u # 2.24 insn per cycle - # 0.01 stalled cycles per insn (74.52%) - 0.504560606 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1373) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/runTest.exe + 1,592,119,572 cycles # 2.783 GHz + 3,148,952,578 instructions # 1.98 insn per cycle + 0.572671512 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1386) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341946 Relative difference = 2.034932117056294e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.409665e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.678412e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.678412e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.548491 sec +INFO: No Floating Point Exceptions have been reported + 1,543,347,926 cycles # 2.789 GHz + 3,062,213,060 instructions # 1.98 insn per cycle + 0.553926856 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1220) (512y: 95) (512z: 0) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 4.232897e-01 +Avg ME (F77/C++) = 0.42328961386341946 +Relative difference = 2.034932117056294e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.150474e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.084771e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.084771e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.600980 sec +INFO: No Floating Point Exceptions have been reported + 1,356,107,276 cycles # 2.238 GHz + 2,361,951,036 instructions # 1.74 insn per cycle + 0.606658090 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 557) (512y: 62) (512z: 944) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 4.232897e-01 +Avg ME (F77/C++) = 0.42328961386341946 +Relative difference = 2.034932117056294e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt index 03a0938477..5f4022aca0 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-05-15_16:43:16 +DATE: 2024-05-15_09:36:27 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.366550e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.323311e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.728963e+09 ) sec^-1 -MeanMatrixElemValue = ( 4.242352e-01 +- 1.228752e-04 ) GeV^0 -TOTAL : 0.326960 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.370405e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.208467e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.261700e+09 ) sec^-1 +MeanMatrixElemValue = ( 4.240325e-01 +- 1.231174e-04 ) GeV^0 +TOTAL : 0.479192 sec INFO: No Floating Point Exceptions have been reported - 764,972,013 cycles:u # 2.296 GHz (73.85%) - 2,176,685 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.57%) - 5,473,175 stalled-cycles-backend:u # 0.72% backend cycles idle (74.11%) - 1,374,260,207 instructions:u # 1.80 insn per cycle - # 0.00 stalled cycles per insn (74.56%) - 0.375723598 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/runTest.exe + 2,027,387,341 cycles # 2.850 GHz + 2,908,651,127 instructions # 1.43 insn per cycle + 0.768339555 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 72 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/GPU) = 4.232895e-01 -Avg ME (F77/GPU) = 0.42328966126660816 -Relative difference = 3.80984192091939e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/GPU) = 4.232893e-01 +Avg ME (F77/GPU) = 0.42328959883889183 +Relative difference = 7.059920764700599e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.312229e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.504727e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.504727e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241992e-01 +- 1.228718e-04 ) GeV^0 -TOTAL : 0.947967 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.802984e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.139381e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.139381e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 1.158709 sec INFO: No Floating Point Exceptions have been reported - 3,116,480,134 cycles:u # 3.260 GHz (75.07%) - 6,210,591 stalled-cycles-frontend:u # 0.20% frontend cycles idle (74.91%) - 7,102,976 stalled-cycles-backend:u # 0.23% backend cycles idle (74.90%) - 8,524,848,265 instructions:u # 2.74 insn per cycle - # 0.00 stalled cycles per insn (74.89%) - 0.959379324 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 516) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/runTest.exe + 3,373,049,721 cycles # 2.900 GHz + 8,663,491,398 instructions # 2.57 insn per cycle + 1.163930937 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 464) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 Avg ME (F77/C++) = 0.42328961598104797 Relative difference = 3.775440734888737e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.375040e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.102188e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.102188e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241992e-01 +- 1.228718e-04 ) GeV^0 -TOTAL : 0.460604 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.387191e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.803778e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.803778e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.531005 sec INFO: No Floating Point Exceptions have been reported - 1,394,080,880 cycles:u # 2.969 GHz (74.96%) - 7,003,149 stalled-cycles-frontend:u # 0.50% frontend cycles idle (74.53%) - 6,087,454 stalled-cycles-backend:u # 0.44% backend cycles idle (74.45%) - 3,710,982,581 instructions:u # 2.66 insn per cycle - # 0.00 stalled cycles per insn (74.50%) - 0.473592012 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1393) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/runTest.exe + 1,545,318,515 cycles # 2.885 GHz + 3,686,876,111 instructions # 2.39 insn per cycle + 0.536250836 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1472) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 -Avg ME (F77/C++) = 0.42328960620216094 -Relative difference = 1.4652287586288606e-08 +Avg ME (F77/C++) = 0.42328960439772345 +Relative difference = 1.0389396439618597e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.664733e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.284601e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.284601e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241992e-01 +- 1.228719e-04 ) GeV^0 -TOTAL : 0.373733 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.111672e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.628157e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.628157e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.425576 sec +INFO: No Floating Point Exceptions have been reported + 1,209,256,462 cycles # 2.808 GHz + 2,425,469,042 instructions # 2.01 insn per cycle + 0.431202452 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1835) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 4.232896e-01 +Avg ME (F77/C++) = 0.42328956670826301 +Relative difference = 7.865002347873079e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.178015e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.875142e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.875142e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.420058 sec INFO: No Floating Point Exceptions have been reported - 1,096,363,607 cycles:u # 2.870 GHz (75.63%) - 6,601,459 stalled-cycles-frontend:u # 0.60% frontend cycles idle (75.08%) - 22,320,721 stalled-cycles-backend:u # 2.04% backend cycles idle (74.88%) - 2,334,473,718 instructions:u # 2.13 insn per cycle - # 0.01 stalled cycles per insn (74.88%) - 0.386229589 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1807) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/runTest.exe + 1,188,994,374 cycles # 2.799 GHz + 2,372,253,902 instructions # 2.00 insn per cycle + 0.425516144 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1716) (512y: 2) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 -Avg ME (F77/C++) = 0.42328956839628518 -Relative difference = 7.466215756732981e-08 +Avg ME (F77/C++) = 0.42328956670826301 +Relative difference = 7.865002347873079e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.937549e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.030922e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.030922e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.449108 sec +INFO: No Floating Point Exceptions have been reported + 1,057,896,924 cycles # 2.331 GHz + 2,045,513,672 instructions # 1.93 insn per cycle + 0.454389124 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1125) (512y: 5) (512z: 1216) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 4.232896e-01 +Avg ME (F77/C++) = 0.42328957567224279 +Relative difference = 5.7473080363015266e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt index b13effeb6c..3d35345a2a 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-05-15_16:43:23 +DATE: 2024-05-15_09:36:38 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.323825e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.319656e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.719868e+09 ) sec^-1 -MeanMatrixElemValue = ( 4.242352e-01 +- 1.228752e-04 ) GeV^0 -TOTAL : 0.308823 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.381885e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.216842e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.268913e+09 ) sec^-1 +MeanMatrixElemValue = ( 4.240325e-01 +- 1.231174e-04 ) GeV^0 +TOTAL : 0.478997 sec INFO: No Floating Point Exceptions have been reported - 704,306,697 cycles:u # 2.128 GHz (75.02%) - 2,137,974 stalled-cycles-frontend:u # 0.30% frontend cycles idle (75.50%) - 5,624,335 stalled-cycles-backend:u # 0.80% backend cycles idle (75.88%) - 1,274,415,055 instructions:u # 1.81 insn per cycle - # 0.00 stalled cycles per insn (76.11%) - 0.359510282 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/runTest.exe + 2,030,799,401 cycles # 2.843 GHz + 2,900,571,681 instructions # 1.43 insn per cycle + 0.770784907 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 71 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/GPU) = 4.232895e-01 -Avg ME (F77/GPU) = 0.42328966126660816 -Relative difference = 3.80984192091939e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/GPU) = 4.232893e-01 +Avg ME (F77/GPU) = 0.42328960436861962 +Relative difference = 7.190557844040413e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.317115e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.514597e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.514597e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241992e-01 +- 1.228718e-04 ) GeV^0 -TOTAL : 0.944202 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.919524e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.156224e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.156224e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 1.146189 sec INFO: No Floating Point Exceptions have been reported - 3,067,315,266 cycles:u # 3.220 GHz (74.81%) - 7,060,895 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.80%) - 8,136,724 stalled-cycles-backend:u # 0.27% backend cycles idle (74.83%) - 8,530,487,090 instructions:u # 2.78 insn per cycle - # 0.00 stalled cycles per insn (74.92%) - 0.955712297 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 379) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/runTest.exe + 3,340,681,084 cycles # 2.903 GHz + 8,537,405,679 instructions # 2.56 insn per cycle + 1.151410707 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 372) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 Avg ME (F77/C++) = 0.42328961598104797 Relative difference = 3.775440734888737e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.371365e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.089598e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.089598e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241992e-01 +- 1.228718e-04 ) GeV^0 -TOTAL : 0.459025 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.328612e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.639360e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.639360e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.540599 sec INFO: No Floating Point Exceptions have been reported - 1,408,227,078 cycles:u # 3.016 GHz (74.30%) - 6,825,925 stalled-cycles-frontend:u # 0.48% frontend cycles idle (74.36%) - 9,607,425 stalled-cycles-backend:u # 0.68% backend cycles idle (74.79%) - 3,619,594,432 instructions:u # 2.57 insn per cycle - # 0.00 stalled cycles per insn (75.58%) - 0.471202314 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1365) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/runTest.exe + 1,541,725,161 cycles # 2.828 GHz + 3,654,878,901 instructions # 2.37 insn per cycle + 0.545806836 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1417) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 -Avg ME (F77/C++) = 0.42328960620216094 -Relative difference = 1.4652287586288606e-08 +Avg ME (F77/C++) = 0.42328960439772345 +Relative difference = 1.0389396439618597e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.694915e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.325213e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.325213e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241992e-01 +- 1.228719e-04 ) GeV^0 -TOTAL : 0.370157 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.095733e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.576377e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.576377e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.428129 sec +INFO: No Floating Point Exceptions have been reported + 1,212,577,396 cycles # 2.801 GHz + 2,409,931,564 instructions # 1.99 insn per cycle + 0.433541587 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1739) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 4.232896e-01 +Avg ME (F77/C++) = 0.42328956670826301 +Relative difference = 7.865002347873079e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.197829e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.948312e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.948312e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.416935 sec INFO: No Floating Point Exceptions have been reported - 1,082,261,478 cycles:u # 2.863 GHz (74.64%) - 7,067,276 stalled-cycles-frontend:u # 0.65% frontend cycles idle (74.61%) - 13,677,966 stalled-cycles-backend:u # 1.26% backend cycles idle (74.60%) - 2,386,959,225 instructions:u # 2.21 insn per cycle - # 0.01 stalled cycles per insn (74.67%) - 0.381523204 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1722) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/runTest.exe + 1,184,755,346 cycles # 2.810 GHz + 2,360,000,457 instructions # 1.99 insn per cycle + 0.422233667 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1639) (512y: 2) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 -Avg ME (F77/C++) = 0.42328956839628518 -Relative difference = 7.466215756732981e-08 +Avg ME (F77/C++) = 0.42328956670826301 +Relative difference = 7.865002347873079e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.948970e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.098496e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.098496e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.445077 sec +INFO: No Floating Point Exceptions have been reported + 1,053,296,798 cycles # 2.342 GHz + 2,030,151,201 instructions # 1.93 insn per cycle + 0.450399180 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1038) (512y: 5) (512z: 1206) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 4.232896e-01 +Avg ME (F77/C++) = 0.42328957567224279 +Relative difference = 5.7473080363015266e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt index 42c2c5fccc..df4c59c2d7 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-05-15_16:43:29 +DATE: 2024-05-15_09:36:49 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.404899e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.300485e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.791389e+08 ) sec^-1 -MeanMatrixElemValue = ( 4.239474e-01 +- 1.231432e-04 ) GeV^0 -TOTAL : 0.376624 sec +EvtsPerSec[Rmb+ME] (23) = ( 6.836360e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.773080e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.372620e+08 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.521460 sec INFO: No Floating Point Exceptions have been reported - 801,858,601 cycles:u # 2.176 GHz (71.98%) - 2,301,473 stalled-cycles-frontend:u # 0.29% frontend cycles idle (73.67%) - 5,597,215 stalled-cycles-backend:u # 0.70% backend cycles idle (74.62%) - 1,342,545,263 instructions:u # 1.67 insn per cycle - # 0.00 stalled cycles per insn (76.05%) - 0.432204588 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/runTest.exe + 2,141,147,741 cycles # 2.839 GHz + 3,055,126,214 instructions # 1.43 insn per cycle + 0.813180869 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 132 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 4.232897e-01 -Avg ME (F77/GPU) = 0.42328961420809230 -Relative difference = 2.026789399531628e-07 +Avg ME (F77/GPU) = 0.42328961420809225 +Relative difference = 2.02678940084305e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.159586e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.310118e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.310118e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 1.093976 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.535385e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.099052e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.099052e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 1.214378 sec INFO: No Floating Point Exceptions have been reported - 3,499,257,249 cycles:u # 3.163 GHz (74.77%) - 8,141,037 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.72%) - 13,622,727 stalled-cycles-backend:u # 0.39% backend cycles idle (74.72%) - 8,671,149,251 instructions:u # 2.48 insn per cycle - # 0.00 stalled cycles per insn (74.86%) - 1.110653138 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 427) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/runTest.exe + 3,512,565,133 cycles # 2.883 GHz + 8,782,234,865 instructions # 2.50 insn per cycle + 1.219810796 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 466) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962565639783 Relative difference = 1.7563291089600324e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.188052e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.865997e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.865997e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 0.668768 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.634092e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.156666e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.156666e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.755579 sec INFO: No Floating Point Exceptions have been reported - 2,047,248,570 cycles:u # 3.006 GHz (74.29%) - 8,783,902 stalled-cycles-frontend:u # 0.43% frontend cycles idle (74.69%) - 8,006,425 stalled-cycles-backend:u # 0.39% backend cycles idle (75.28%) - 5,269,378,246 instructions:u # 2.57 insn per cycle - # 0.00 stalled cycles per insn (75.33%) - 0.685132964 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1260) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/runTest.exe + 2,158,444,599 cycles # 2.839 GHz + 5,462,042,613 instructions # 2.53 insn per cycle + 0.761050701 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1315) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962565639783 Relative difference = 1.7563291089600324e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.575206e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.372803e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.372803e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 0.482122 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.351677e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.533393e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.533393e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.560475 sec +INFO: No Floating Point Exceptions have been reported + 1,574,362,055 cycles # 2.784 GHz + 3,128,678,680 instructions # 1.99 insn per cycle + 0.566031332 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1508) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 4.232897e-01 +Avg ME (F77/C++) = 0.42328962604218012 +Relative difference = 1.747215201983364e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.484188e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.839921e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.839921e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.536080 sec INFO: No Floating Point Exceptions have been reported - 1,408,748,553 cycles:u # 2.850 GHz (73.78%) - 8,148,579 stalled-cycles-frontend:u # 0.58% frontend cycles idle (74.12%) - 9,562,412 stalled-cycles-backend:u # 0.68% backend cycles idle (74.85%) - 3,008,594,405 instructions:u # 2.14 insn per cycle - # 0.00 stalled cycles per insn (75.66%) - 0.498542187 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1494) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/runTest.exe + 1,508,778,976 cycles # 2.790 GHz + 2,980,181,539 instructions # 1.98 insn per cycle + 0.541495684 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1266) (512y: 104) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 -Avg ME (F77/C++) = 0.42328962559055894 -Relative difference = 1.757884518645067e-07 +Avg ME (F77/C++) = 0.42328962604218012 +Relative difference = 1.747215201983364e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.192468e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.184479e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.184479e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.593157 sec +INFO: No Floating Point Exceptions have been reported + 1,325,099,887 cycles # 2.215 GHz + 2,317,476,168 instructions # 1.75 insn per cycle + 0.598857501 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 708) (512y: 64) (512z: 1000) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 4.232897e-01 +Avg ME (F77/C++) = 0.42328962604218012 +Relative difference = 1.747215201983364e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt index 420f0fe548..f455f2bbec 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-05-15_16:43:36 +DATE: 2024-05-15_09:37:01 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.139991e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.972399e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.594504e+08 ) sec^-1 -MeanMatrixElemValue = ( 4.239474e-01 +- 1.231432e-04 ) GeV^0 -TOTAL : 0.333772 sec +EvtsPerSec[Rmb+ME] (23) = ( 6.920881e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.269245e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.735869e+08 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.518131 sec INFO: No Floating Point Exceptions have been reported - 753,695,407 cycles:u # 2.103 GHz (74.79%) - 2,155,969 stalled-cycles-frontend:u # 0.29% frontend cycles idle (75.40%) - 5,408,780 stalled-cycles-backend:u # 0.72% backend cycles idle (75.66%) - 1,329,703,636 instructions:u # 1.76 insn per cycle - # 0.00 stalled cycles per insn (75.09%) - 0.387999054 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/runTest.exe + 2,139,648,032 cycles # 2.844 GHz + 3,068,084,951 instructions # 1.43 insn per cycle + 0.808990315 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 4.232897e-01 -Avg ME (F77/GPU) = 0.42328961420809230 -Relative difference = 2.026789399531628e-07 +Avg ME (F77/GPU) = 0.42328961420809225 +Relative difference = 2.02678940084305e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.167517e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.320669e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.320669e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 1.085247 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.651516e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.113057e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.113057e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 1.199334 sec INFO: No Floating Point Exceptions have been reported - 3,495,870,402 cycles:u # 3.185 GHz (74.54%) - 8,419,320 stalled-cycles-frontend:u # 0.24% frontend cycles idle (74.84%) - 13,814,541 stalled-cycles-backend:u # 0.40% backend cycles idle (75.20%) - 8,553,626,945 instructions:u # 2.45 insn per cycle - # 0.00 stalled cycles per insn (75.22%) - 1.101702386 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 358) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/runTest.exe + 3,485,486,663 cycles # 2.895 GHz + 8,692,192,022 instructions # 2.49 insn per cycle + 1.204801600 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 408) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962565639783 Relative difference = 1.7563291089600324e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.208789e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.872230e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.872230e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 0.662705 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.605320e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.116395e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.116395e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.766826 sec INFO: No Floating Point Exceptions have been reported - 1,998,727,385 cycles:u # 2.962 GHz (75.01%) - 8,096,924 stalled-cycles-frontend:u # 0.41% frontend cycles idle (75.11%) - 15,605,174 stalled-cycles-backend:u # 0.78% backend cycles idle (75.10%) - 5,333,491,692 instructions:u # 2.67 insn per cycle - # 0.00 stalled cycles per insn (75.10%) - 0.678888304 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1221) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/runTest.exe + 2,175,433,735 cycles # 2.822 GHz + 5,397,175,628 instructions # 2.48 insn per cycle + 0.772462095 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1286) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962565639783 Relative difference = 1.7563291089600324e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.583996e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.397080e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.397080e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 0.480459 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.361713e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.582957e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.582957e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.558306 sec +INFO: No Floating Point Exceptions have been reported + 1,566,391,037 cycles # 2.781 GHz + 3,096,526,527 instructions # 1.98 insn per cycle + 0.564013625 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1403) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 4.232897e-01 +Avg ME (F77/C++) = 0.42328962604218012 +Relative difference = 1.747215201983364e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.482537e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.845149e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.845149e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.536318 sec INFO: No Floating Point Exceptions have been reported - 1,384,513,109 cycles:u # 2.811 GHz (74.26%) - 8,017,444 stalled-cycles-frontend:u # 0.58% frontend cycles idle (75.11%) - 16,159,141 stalled-cycles-backend:u # 1.17% backend cycles idle (75.64%) - 3,004,246,317 instructions:u # 2.17 insn per cycle - # 0.01 stalled cycles per insn (75.64%) - 0.496541853 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1430) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/runTest.exe + 1,509,105,702 cycles # 2.789 GHz + 2,962,529,864 instructions # 1.96 insn per cycle + 0.541724501 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1207) (512y: 104) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 -Avg ME (F77/C++) = 0.42328962559055894 -Relative difference = 1.757884518645067e-07 +Avg ME (F77/C++) = 0.42328962604218012 +Relative difference = 1.747215201983364e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.120551e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.050745e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.050745e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.608424 sec +INFO: No Floating Point Exceptions have been reported + 1,350,605,417 cycles # 2.202 GHz + 2,301,674,581 instructions # 1.70 insn per cycle + 0.613832458 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 669) (512y: 64) (512z: 987) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 4.232897e-01 +Avg ME (F77/C++) = 0.42328962604218012 +Relative difference = 1.747215201983364e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt index 302ed78a64..5113c2293b 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_16:41:45 +DATE: 2024-05-15_09:33:48 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.832898e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.951894e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.006384e+07 ) sec^-1 -MeanMatrixElemValue = ( 3.295059e+00 +- 3.224567e-03 ) GeV^0 -TOTAL : 0.382081 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.741611e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.169597e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.279193e+08 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 0.530929 sec INFO: No Floating Point Exceptions have been reported - 877,304,933 cycles:u # 2.198 GHz (74.42%) - 2,115,937 stalled-cycles-frontend:u # 0.24% frontend cycles idle (75.86%) - 5,183,719 stalled-cycles-backend:u # 0.59% backend cycles idle (76.00%) - 1,377,648,981 instructions:u # 1.57 insn per cycle - # 0.00 stalled cycles per insn (75.10%) - 0.435827075 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest.exe + 2,195,068,626 cycles # 2.840 GHz + 3,156,047,151 instructions # 1.44 insn per cycle + 0.830513630 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 3.234080e+00 -Avg ME (F77/GPU) = 3.2340795799595181 -Relative difference = 1.298794346312088e-07 +Avg ME (F77/GPU) = 3.2340795799595186 +Relative difference = 1.2987943449389332e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.524994e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.593507e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.593507e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 4.336531 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.039441e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.099419e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.099419e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 5.250958 sec INFO: No Floating Point Exceptions have been reported - 14,730,042,192 cycles:u # 3.388 GHz (74.98%) - 9,568,439 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.98%) - 1,140,928,798 stalled-cycles-backend:u # 7.75% backend cycles idle (74.99%) - 38,744,640,497 instructions:u # 2.63 insn per cycle - # 0.03 stalled cycles per insn (74.98%) - 4.352523083 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 726) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe + 15,179,924,789 cycles # 2.888 GHz + 38,380,026,423 instructions # 2.53 insn per cycle + 5.256350609 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 673) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340795799593964 Relative difference = 1.2987947225564713e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.312999e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.524318e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.524318e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 2.612223 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.475229e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.666711e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.666711e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 3.127283 sec INFO: No Floating Point Exceptions have been reported - 8,782,783,014 cycles:u # 3.347 GHz (75.03%) - 9,188,407 stalled-cycles-frontend:u # 0.10% frontend cycles idle (75.00%) - 1,802,679,075 stalled-cycles-backend:u # 20.53% backend cycles idle (75.00%) - 24,413,813,857 instructions:u # 2.78 insn per cycle - # 0.07 stalled cycles per insn (75.00%) - 2.628225339 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2067) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe + 9,015,042,077 cycles # 2.879 GHz + 24,584,647,400 instructions # 2.73 insn per cycle + 3.132785227 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 -Avg ME (F77/C++) = 3.2340795799593964 -Relative difference = 1.2987947225564713e-07 +Avg ME (F77/C++) = 3.2340795799593955 +Relative difference = 1.2987947253027805e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 7.576771e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.160866e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.160866e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 1.564392 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.420774e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.902872e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.902872e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 2.045849 sec +INFO: No Floating Point Exceptions have been reported + 5,486,128,241 cycles # 2.677 GHz + 11,257,791,321 instructions # 2.05 insn per cycle + 2.051495208 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2379) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 3.234080e+00 +Avg ME (F77/C++) = 3.2340795799594546 +Relative difference = 1.2987945426732077e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.115637e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.706987e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.706987e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 1.823473 sec INFO: No Floating Point Exceptions have been reported - 5,138,956,908 cycles:u # 3.261 GHz (74.76%) - 8,020,795 stalled-cycles-frontend:u # 0.16% frontend cycles idle (74.77%) - 32,368,163 stalled-cycles-backend:u # 0.63% backend cycles idle (75.01%) - 11,470,668,323 instructions:u # 2.23 insn per cycle - # 0.00 stalled cycles per insn (75.13%) - 1.580105792 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2399) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe + 4,932,683,503 cycles # 2.698 GHz + 10,564,312,534 instructions # 2.14 insn per cycle + 1.829070422 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 -Avg ME (F77/C++) = 3.2340795799594542 -Relative difference = 1.2987945440463624e-07 +Avg ME (F77/C++) = 3.2340795799594546 +Relative difference = 1.2987945426732077e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.642478e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.841447e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.841447e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 2.987876 sec +INFO: No Floating Point Exceptions have been reported + 5,393,291,420 cycles # 1.802 GHz + 7,798,978,125 instructions # 1.45 insn per cycle + 2.993405603 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1545) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 3.234080e+00 +Avg ME (F77/C++) = 3.2340795799594546 +Relative difference = 1.2987945426732077e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt index 9a93ce363a..2da52e42cf 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_16:41:58 +DATE: 2024-05-15_09:34:12 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.658767e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.925934e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.978977e+07 ) sec^-1 -MeanMatrixElemValue = ( 3.295059e+00 +- 3.224567e-03 ) GeV^0 -TOTAL : 0.393694 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.746278e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.169345e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.279692e+08 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 0.526969 sec INFO: No Floating Point Exceptions have been reported - 903,828,872 cycles:u # 2.285 GHz (73.39%) - 2,128,293 stalled-cycles-frontend:u # 0.24% frontend cycles idle (74.25%) - 5,468,106 stalled-cycles-backend:u # 0.60% backend cycles idle (75.65%) - 1,391,348,192 instructions:u # 1.54 insn per cycle - # 0.00 stalled cycles per insn (75.95%) - 0.447396930 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/runTest.exe + 2,175,481,601 cycles # 2.841 GHz + 3,133,236,154 instructions # 1.44 insn per cycle + 0.822493478 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 3.234080e+00 -Avg ME (F77/GPU) = 3.2340795799595181 -Relative difference = 1.298794346312088e-07 +Avg ME (F77/GPU) = 3.2340795799595186 +Relative difference = 1.2987943449389332e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.426481e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.486692e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.486692e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 4.500856 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.073853e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.135304e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.135304e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 5.163747 sec INFO: No Floating Point Exceptions have been reported - 15,390,828,690 cycles:u # 3.411 GHz (75.02%) - 8,990,756 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.01%) - 23,490,713 stalled-cycles-backend:u # 0.15% backend cycles idle (75.01%) - 39,479,847,281 instructions:u # 2.57 insn per cycle - # 0.00 stalled cycles per insn (75.00%) - 4.516979997 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 597) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest.exe + 15,015,477,641 cycles # 2.905 GHz + 40,101,004,073 instructions # 2.67 insn per cycle + 5.169229329 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 -Avg ME (F77/C++) = 3.2340795799593969 -Relative difference = 1.2987947211833165e-07 +Avg ME (F77/C++) = 3.2340795799593964 +Relative difference = 1.2987947225564713e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.366565e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.582371e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.582371e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 2.580975 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.645087e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.854605e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.854605e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 2.984803 sec INFO: No Floating Point Exceptions have been reported - 8,665,784,444 cycles:u # 3.343 GHz (75.04%) - 8,737,692 stalled-cycles-frontend:u # 0.10% frontend cycles idle (75.01%) - 1,175,684,003 stalled-cycles-backend:u # 13.57% backend cycles idle (75.00%) - 23,537,357,898 instructions:u # 2.72 insn per cycle - # 0.05 stalled cycles per insn (75.00%) - 2.596226506 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1947) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest.exe + 8,681,133,196 cycles # 2.904 GHz + 23,670,240,335 instructions # 2.73 insn per cycle + 2.990217540 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2071) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 -Avg ME (F77/C++) = 3.2340795799593964 -Relative difference = 1.2987947225564713e-07 +Avg ME (F77/C++) = 3.2340795799593955 +Relative difference = 1.2987947253027805e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.889868e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.366562e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.366562e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 1.702727 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.949358e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.327629e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.327629e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 2.227194 sec +INFO: No Floating Point Exceptions have been reported + 6,083,454,450 cycles # 2.726 GHz + 13,060,354,927 instructions # 2.15 insn per cycle + 2.232659491 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2545) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 3.234080e+00 +Avg ME (F77/C++) = 3.2340795799594546 +Relative difference = 1.2987945426732077e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.235300e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.655532e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.655532e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 2.110790 sec INFO: No Floating Point Exceptions have been reported - 5,646,738,380 cycles:u # 3.294 GHz (74.83%) - 9,540,611 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.82%) - 298,360,320 stalled-cycles-backend:u # 5.28% backend cycles idle (74.80%) - 13,134,319,700 instructions:u # 2.33 insn per cycle - # 0.02 stalled cycles per insn (75.02%) - 1.718575828 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2559) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest.exe + 5,768,971,479 cycles # 2.727 GHz + 12,319,476,206 instructions # 2.14 insn per cycle + 2.116330183 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2092) (512y: 294) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 -Avg ME (F77/C++) = 3.2340795799594542 -Relative difference = 1.2987945440463624e-07 +Avg ME (F77/C++) = 3.2340795799594546 +Relative difference = 1.2987945426732077e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.372736e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.543941e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.543941e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 3.218112 sec +INFO: No Floating Point Exceptions have been reported + 5,823,139,493 cycles # 1.807 GHz + 9,601,917,078 instructions # 1.65 insn per cycle + 3.223679661 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1509) (512y: 209) (512z: 1970) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 3.234080e+00 +Avg ME (F77/C++) = 3.2340795799594546 +Relative difference = 1.2987945426732077e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt index ccb760b2ba..1af837234a 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_16:42:12 +DATE: 2024-05-15_09:34:36 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.668767e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.923134e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.083730e+08 ) sec^-1 -MeanMatrixElemValue = ( 3.286435e+00 +- 3.209475e-03 ) GeV^0 -TOTAL : 0.319988 sec +EvtsPerSec[Rmb+ME] (23) = ( 9.812221e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.671322e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.977032e+08 ) sec^-1 +MeanMatrixElemValue = ( 3.294909e+00 +- 3.228140e-03 ) GeV^0 +TOTAL : 0.485928 sec INFO: No Floating Point Exceptions have been reported - 801,327,175 cycles:u # 2.329 GHz (73.65%) - 2,161,896 stalled-cycles-frontend:u # 0.27% frontend cycles idle (74.62%) - 4,181,501 stalled-cycles-backend:u # 0.52% backend cycles idle (73.95%) - 1,363,211,462 instructions:u # 1.70 insn per cycle - # 0.00 stalled cycles per insn (75.01%) - 0.374378383 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest.exe + 2,016,146,435 cycles # 2.835 GHz + 2,919,084,596 instructions # 1.45 insn per cycle + 0.768276318 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/GPU) = 3.234089e+00 -Avg ME (F77/GPU) = 3.2340912986546755 -Relative difference = 7.107580142328097e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/GPU) = 3.234085e+00 +Avg ME (F77/GPU) = 3.2341253389604390 +Relative difference = 1.2473067479392238e-05 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.996525e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.087124e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.087124e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287818e+00 +- 3.220110e-03 ) GeV^0 -TOTAL : 3.647819 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.194407e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.265388e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.265388e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294973e+00 +- 3.228584e-03 ) GeV^0 +TOTAL : 4.865848 sec INFO: No Floating Point Exceptions have been reported - 12,622,398,192 cycles:u # 3.452 GHz (74.87%) - 6,907,551 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.94%) - 764,688,379 stalled-cycles-backend:u # 6.06% backend cycles idle (75.05%) - 36,996,500,373 instructions:u # 2.93 insn per cycle - # 0.02 stalled cycles per insn (75.06%) - 3.661293566 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 607) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe + 14,143,024,804 cycles # 2.904 GHz + 38,346,436,959 instructions # 2.71 insn per cycle + 4.871040899 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 596) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 3.234094e+00 -Avg ME (F77/C++) = 3.2340939850546420 -Relative difference = 4.621188450363643e-09 +Avg ME (F77/C++) = 3.2340941932052374 +Relative difference = 5.974014286114415e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.339494e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.774882e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.774882e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287818e+00 +- 3.220111e-03 ) GeV^0 -TOTAL : 1.800475 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.902133e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.301943e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.301943e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294972e+00 +- 3.228583e-03 ) GeV^0 +TOTAL : 2.227789 sec INFO: No Floating Point Exceptions have been reported - 6,126,928,844 cycles:u # 3.387 GHz (74.79%) - 7,098,604 stalled-cycles-frontend:u # 0.12% frontend cycles idle (74.81%) - 2,006,509,813 stalled-cycles-backend:u # 32.75% backend cycles idle (74.93%) - 15,157,055,875 instructions:u # 2.47 insn per cycle - # 0.13 stalled cycles per insn (75.13%) - 1.812595313 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2462) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe + 6,482,988,779 cycles # 2.904 GHz + 15,822,130,701 instructions # 2.44 insn per cycle + 2.233037883 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2693) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 3.234094e+00 -Avg ME (F77/C++) = 3.2340941177681088 -Relative difference = 3.641455970126884e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 3.234093e+00 +Avg ME (F77/C++) = 3.2340934062376618 +Relative difference = 1.2561100182708985e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.220743e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.377837e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.377837e+06 ) sec^-1 -MeanMatrixElemValue = ( 3.287796e+00 +- 3.219543e-03 ) GeV^0 -TOTAL : 1.004275 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.940581e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.026038e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.026038e+06 ) sec^-1 +MeanMatrixElemValue = ( 3.295004e+00 +- 3.229072e-03 ) GeV^0 +TOTAL : 1.260538 sec INFO: No Floating Point Exceptions have been reported - 3,313,286,850 cycles:u # 3.273 GHz (74.73%) - 7,508,941 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.71%) - 1,105,690,829 stalled-cycles-backend:u # 33.37% backend cycles idle (74.74%) - 7,720,635,250 instructions:u # 2.33 insn per cycle - # 0.14 stalled cycles per insn (74.94%) - 1.016455975 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3076) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe + 3,456,681,256 cycles # 2.732 GHz + 7,598,840,761 instructions # 2.20 insn per cycle + 1.265835692 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3054) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 3.234093e+00 -Avg ME (F77/C++) = 3.2340926420874894 -Relative difference = 1.1066858953654753e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 3.234092e+00 +Avg ME (F77/C++) = 3.2340919882990420 +Relative difference = 3.6180040581126224e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.403615e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.088127e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.088127e+06 ) sec^-1 +MeanMatrixElemValue = ( 3.295004e+00 +- 3.229072e-03 ) GeV^0 +TOTAL : 1.203220 sec +INFO: No Floating Point Exceptions have been reported + 3,237,988,506 cycles # 2.681 GHz + 7,206,923,956 instructions # 2.23 insn per cycle + 1.208534996 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2854) (512y: 23) (512z: 0) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 3.234092e+00 +Avg ME (F77/C++) = 3.2340919882990420 +Relative difference = 3.6180040581126224e-09 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.777065e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.501185e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.501185e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.295004e+00 +- 3.229072e-03 ) GeV^0 +TOTAL : 1.635695 sec +INFO: No Floating Point Exceptions have been reported + 3,058,049,214 cycles # 1.865 GHz + 5,840,333,775 instructions # 1.91 insn per cycle + 1.641002915 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2375) (512y: 24) (512z: 1889) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 3.234092e+00 +Avg ME (F77/C++) = 3.2340921289287508 +Relative difference = 3.986551736519174e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt index 780d26225f..71e6b0ffef 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_16:42:23 +DATE: 2024-05-15_09:34:55 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.921251e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.081243e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.260252e+08 ) sec^-1 -MeanMatrixElemValue = ( 3.286435e+00 +- 3.209475e-03 ) GeV^0 -TOTAL : 0.320643 sec +EvtsPerSec[Rmb+ME] (23) = ( 9.858638e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.727382e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.049746e+08 ) sec^-1 +MeanMatrixElemValue = ( 3.294909e+00 +- 3.228140e-03 ) GeV^0 +TOTAL : 0.486234 sec INFO: No Floating Point Exceptions have been reported - 782,507,868 cycles:u # 2.281 GHz (73.92%) - 2,270,647 stalled-cycles-frontend:u # 0.29% frontend cycles idle (74.12%) - 4,716,638 stalled-cycles-backend:u # 0.60% backend cycles idle (74.10%) - 1,294,011,543 instructions:u # 1.65 insn per cycle - # 0.00 stalled cycles per insn (76.06%) - 0.372039073 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/runTest.exe + 2,023,449,070 cycles # 2.846 GHz + 2,873,402,373 instructions # 1.42 insn per cycle + 0.769379888 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/GPU) = 3.234089e+00 -Avg ME (F77/GPU) = 3.2340912986546755 -Relative difference = 7.107580142328097e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/GPU) = 3.234085e+00 +Avg ME (F77/GPU) = 3.2341253389604390 +Relative difference = 1.2473067479392238e-05 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.982185e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.071485e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.071485e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287818e+00 +- 3.220110e-03 ) GeV^0 -TOTAL : 3.662693 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.166228e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.235687e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.235687e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294973e+00 +- 3.228584e-03 ) GeV^0 +TOTAL : 4.927743 sec INFO: No Floating Point Exceptions have been reported - 12,648,718,745 cycles:u # 3.446 GHz (74.89%) - 7,351,614 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.94%) - 9,939,996 stalled-cycles-backend:u # 0.08% backend cycles idle (74.84%) - 37,497,463,017 instructions:u # 2.96 insn per cycle - # 0.00 stalled cycles per insn (74.91%) - 3.674792475 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 500) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest.exe + 14,309,020,603 cycles # 2.901 GHz + 39,834,185,016 instructions # 2.78 insn per cycle + 4.932988384 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 567) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 3.234094e+00 -Avg ME (F77/C++) = 3.2340939850546420 -Relative difference = 4.621188450363643e-09 +Avg ME (F77/C++) = 3.2340941675938666 +Relative difference = 5.182096339328524e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 7.348676e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.942747e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.942747e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287818e+00 +- 3.220111e-03 ) GeV^0 -TOTAL : 1.570234 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.724901e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.279125e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.279125e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294972e+00 +- 3.228583e-03 ) GeV^0 +TOTAL : 1.920068 sec INFO: No Floating Point Exceptions have been reported - 5,336,993,251 cycles:u # 3.383 GHz (74.71%) - 7,482,422 stalled-cycles-frontend:u # 0.14% frontend cycles idle (74.93%) - 1,350,151,950 stalled-cycles-backend:u # 25.30% backend cycles idle (75.15%) - 15,181,564,216 instructions:u # 2.84 insn per cycle - # 0.09 stalled cycles per insn (75.15%) - 1.581043852 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2329) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest.exe + 5,587,136,910 cycles # 2.903 GHz + 15,286,018,089 instructions # 2.74 insn per cycle + 1.925405428 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2473) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 3.234094e+00 -Avg ME (F77/C++) = 3.2340941177681088 -Relative difference = 3.641455970126884e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 3.234093e+00 +Avg ME (F77/C++) = 3.2340934062376618 +Relative difference = 1.2561100182708985e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 8.942098e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.758757e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.758757e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287796e+00 +- 3.219543e-03 ) GeV^0 -TOTAL : 1.315652 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.329531e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.968208e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.968208e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.295004e+00 +- 3.229072e-03 ) GeV^0 +TOTAL : 1.743816 sec INFO: No Floating Point Exceptions have been reported - 4,441,993,205 cycles:u # 3.357 GHz (74.68%) - 8,141,578 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.96%) - 1,679,672,585 stalled-cycles-backend:u # 37.81% backend cycles idle (75.21%) - 9,786,472,986 instructions:u # 2.20 insn per cycle - # 0.17 stalled cycles per insn (75.21%) - 1.327031748 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3749) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest.exe + 4,752,684,066 cycles # 2.718 GHz + 9,734,599,386 instructions # 2.05 insn per cycle + 1.749311383 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3707) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 3.234093e+00 -Avg ME (F77/C++) = 3.2340926462784410 -Relative difference = 1.0937272340475427e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 3.234092e+00 +Avg ME (F77/C++) = 3.2340919817797840 +Relative difference = 5.633796441974414e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.472652e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.141162e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.141162e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.295004e+00 +- 3.229072e-03 ) GeV^0 +TOTAL : 1.707763 sec +INFO: No Floating Point Exceptions have been reported + 4,621,308,695 cycles # 2.699 GHz + 9,326,787,425 instructions # 2.02 insn per cycle + 1.713040106 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3495) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 3.234092e+00 +Avg ME (F77/C++) = 3.2340919817797840 +Relative difference = 5.633796441974414e-09 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.532423e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.010355e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.010355e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.295004e+00 +- 3.229072e-03 ) GeV^0 +TOTAL : 1.982779 sec +INFO: No Floating Point Exceptions have been reported + 3,653,134,219 cycles # 1.838 GHz + 7,034,588,180 instructions # 1.93 insn per cycle + 1.987995749 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2608) (512y: 12) (512z: 2220) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 3.234092e+00 +Avg ME (F77/C++) = 3.2340921270661056 +Relative difference = 3.928957668408837e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt index 9650f66d4a..32f5af3903 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_16:42:35 +DATE: 2024-05-15_09:35:16 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.852469e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.026591e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.082432e+07 ) sec^-1 -MeanMatrixElemValue = ( 3.295059e+00 +- 3.224567e-03 ) GeV^0 -TOTAL : 0.369753 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.732363e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.167582e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.277301e+08 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 0.528085 sec INFO: No Floating Point Exceptions have been reported - 910,136,031 cycles:u # 2.309 GHz (74.36%) - 2,197,500 stalled-cycles-frontend:u # 0.24% frontend cycles idle (74.30%) - 5,789,107 stalled-cycles-backend:u # 0.64% backend cycles idle (74.11%) - 1,453,168,059 instructions:u # 1.60 insn per cycle - # 0.00 stalled cycles per insn (75.55%) - 0.424603823 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/runTest.exe + 2,179,676,411 cycles # 2.847 GHz + 3,146,231,581 instructions # 1.44 insn per cycle + 0.822131932 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 3.234080e+00 -Avg ME (F77/GPU) = 3.2340795839181671 -Relative difference = 1.2865539287460837e-07 +Avg ME (F77/GPU) = 3.2340795839181666 +Relative difference = 1.2865539301192385e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.463673e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.525138e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.525138e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 4.436706 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.042066e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.101336e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.101336e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 5.243500 sec INFO: No Floating Point Exceptions have been reported - 15,156,001,082 cycles:u # 3.407 GHz (75.01%) - 9,664,661 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.01%) - 868,948,815 stalled-cycles-backend:u # 5.73% backend cycles idle (75.01%) - 39,292,243,946 instructions:u # 2.59 insn per cycle - # 0.02 stalled cycles per insn (75.01%) - 4.453135404 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 747) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest.exe + 15,259,290,721 cycles # 2.908 GHz + 38,581,382,277 instructions # 2.53 insn per cycle + 5.248883068 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 677) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796721168488 Relative difference = 1.0138374786539113e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.506627e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.734964e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.734964e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 2.507951 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.483834e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.674993e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.674993e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 3.119385 sec INFO: No Floating Point Exceptions have been reported - 8,429,880,809 cycles:u # 3.347 GHz (74.92%) - 8,940,501 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.91%) - 1,301,822,604 stalled-cycles-backend:u # 15.44% backend cycles idle (74.92%) - 24,149,695,606 instructions:u # 2.86 insn per cycle - # 0.05 stalled cycles per insn (74.92%) - 2.523103375 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2102) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest.exe + 8,939,775,457 cycles # 2.862 GHz + 24,230,284,808 instructions # 2.71 insn per cycle + 3.124806580 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2188) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796721168488 Relative difference = 1.0138374786539113e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 7.750025e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.357405e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.357405e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 1.533704 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.594731e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.084904e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.084904e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 1.983472 sec +INFO: No Floating Point Exceptions have been reported + 5,389,142,139 cycles # 2.710 GHz + 11,283,335,312 instructions # 2.09 insn per cycle + 1.988987810 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2483) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 3.234080e+00 +Avg ME (F77/C++) = 3.2340796772295590 +Relative difference = 9.980286234148268e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.299283e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.923370e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.923370e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 1.772999 sec INFO: No Floating Point Exceptions have been reported - 5,071,742,396 cycles:u # 3.283 GHz (74.70%) - 12,094,843 stalled-cycles-frontend:u # 0.24% frontend cycles idle (74.84%) - 1,405,067,296 stalled-cycles-backend:u # 27.70% backend cycles idle (75.10%) - 11,364,876,035 instructions:u # 2.24 insn per cycle - # 0.12 stalled cycles per insn (75.15%) - 1.549168658 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2470) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest.exe + 4,852,815,875 cycles # 2.731 GHz + 10,532,942,750 instructions # 2.17 insn per cycle + 1.778490907 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2170) (512y: 148) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 -Avg ME (F77/C++) = 3.2340796844996675 -Relative difference = 9.755489429022839e-08 +Avg ME (F77/C++) = 3.2340796772295590 +Relative difference = 9.980286234148268e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.819775e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.040098e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.040098e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 2.854977 sec +INFO: No Floating Point Exceptions have been reported + 5,210,701,654 cycles # 1.823 GHz + 7,608,646,328 instructions # 1.46 insn per cycle + 2.860486302 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1633) (512y: 126) (512z: 1611) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 3.234080e+00 +Avg ME (F77/C++) = 3.2340796772295590 +Relative difference = 9.980286234148268e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt index 5f5fc8dfa6..0fb04f2139 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_16:42:48 +DATE: 2024-05-15_09:35:39 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/gcheck.exe -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.856097e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.930499e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.983942e+07 ) sec^-1 -MeanMatrixElemValue = ( 3.295059e+00 +- 3.224567e-03 ) GeV^0 -TOTAL : 0.370270 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.742163e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.168613e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.278429e+08 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 0.529609 sec INFO: No Floating Point Exceptions have been reported - 893,302,871 cycles:u # 2.248 GHz (74.53%) - 2,064,846 stalled-cycles-frontend:u # 0.23% frontend cycles idle (75.73%) - 5,563,392 stalled-cycles-backend:u # 0.62% backend cycles idle (75.97%) - 1,398,072,822 instructions:u # 1.57 insn per cycle - # 0.00 stalled cycles per insn (76.03%) - 0.422543554 seconds time elapsed -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/runTest.exe + 2,201,621,718 cycles # 2.841 GHz + 3,141,769,862 instructions # 1.43 insn per cycle + 0.833943270 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fgcheck.exe 2 64 2 Avg ME (C++/GPU) = 3.234080e+00 -Avg ME (F77/GPU) = 3.2340795839181671 -Relative difference = 1.2865539287460837e-07 +Avg ME (F77/GPU) = 3.2340795839181666 +Relative difference = 1.2865539301192385e-07 OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/gcheck.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.427184e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.486616e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.486616e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 4.501563 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.029495e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.088187e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.088187e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 5.273888 sec INFO: No Floating Point Exceptions have been reported - 15,471,361,613 cycles:u # 3.428 GHz (75.01%) - 9,587,219 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.01%) - 53,435,497 stalled-cycles-backend:u # 0.35% backend cycles idle (75.01%) - 40,101,852,759 instructions:u # 2.59 insn per cycle - # 0.00 stalled cycles per insn (75.02%) - 4.518616180 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 631) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest.exe + 15,342,852,271 cycles # 2.907 GHz + 40,369,748,050 instructions # 2.63 insn per cycle + 5.279165532 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796721168488 Relative difference = 1.0138374786539113e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.491073e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.718650e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.718650e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 2.516438 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.708852e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.925232e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.925232e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 2.936198 sec INFO: No Floating Point Exceptions have been reported - 8,513,144,488 cycles:u # 3.366 GHz (75.02%) - 9,669,053 stalled-cycles-frontend:u # 0.11% frontend cycles idle (75.01%) - 1,081,416,989 stalled-cycles-backend:u # 12.70% backend cycles idle (75.01%) - 23,544,538,216 instructions:u # 2.77 insn per cycle - # 0.05 stalled cycles per insn (75.01%) - 2.532731041 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1992) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest.exe + 8,518,728,881 cycles # 2.897 GHz + 23,252,737,328 instructions # 2.73 insn per cycle + 2.941680463 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2090) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796721168488 Relative difference = 1.0138374786539113e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.853773e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.323472e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.323472e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 1.712400 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.799915e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.157429e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.157429e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 2.293101 sec +INFO: No Floating Point Exceptions have been reported + 6,243,757,154 cycles # 2.717 GHz + 12,962,082,336 instructions # 2.08 insn per cycle + 2.298726731 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2668) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 3.234080e+00 +Avg ME (F77/C++) = 3.2340796772295590 +Relative difference = 9.980286234148268e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.081644e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.484011e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.484011e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 2.171117 sec INFO: No Floating Point Exceptions have been reported - 5,695,677,441 cycles:u # 3.302 GHz (74.96%) - 11,580,613 stalled-cycles-frontend:u # 0.20% frontend cycles idle (74.95%) - 725,805,658 stalled-cycles-backend:u # 12.74% backend cycles idle (74.95%) - 13,041,698,391 instructions:u # 2.29 insn per cycle - # 0.06 stalled cycles per insn (74.97%) - 1.728510178 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2710) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest.exe + 5,931,060,022 cycles # 2.726 GHz + 12,239,685,205 instructions # 2.06 insn per cycle + 2.176662808 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2208) (512y: 296) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 -Avg ME (F77/C++) = 3.2340796844996675 -Relative difference = 9.755489429022839e-08 +Avg ME (F77/C++) = 3.2340796772295590 +Relative difference = 9.980286234148268e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.503773e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.687107e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.687107e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 3.101704 sec +INFO: No Floating Point Exceptions have been reported + 5,594,172,202 cycles # 1.801 GHz + 8,744,859,098 instructions # 1.56 insn per cycle + 3.107224592 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1490) (512y: 183) (512z: 1908) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check.exe is not supported (no avx512vl in /proc/cpuinfo) +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 3.234080e+00 +Avg ME (F77/C++) = 3.2340796772295590 +Relative difference = 9.980286234148268e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED From 7f1ee3bd8c21acbed01ef40011704515005064af Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 16 May 2024 08:22:31 +0200 Subject: [PATCH 35/48] [jtmk2] rerun 30 tmad tests on itscrd90 - one new FPE issue #845 in log_gqttq_mad_f_inl0_hrd0.txt, the rest as expected STARTED AT Thu May 16 01:24:16 AM CEST 2024 (SM tests) ENDED(1) AT Thu May 16 05:58:45 AM CEST 2024 [Status=0] (BSM tests) ENDED(1) AT Thu May 16 06:07:42 AM CEST 2024 [Status=0] 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt 18 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt 1 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt 0 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt 0 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt 0 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt 0 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt 0 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt 0 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt The new issue #845 is the following +Program received signal SIGFPE: Floating-point exception - erroneous arithmetic operation. + +Backtrace for this error: +#0 0x7f2a1a623860 in ??? +#1 0x7f2a1a622a05 in ??? +#2 0x7f2a1a254def in ??? +#3 0x7f2a1ae20acc in ??? +#4 0x7f2a1acc4575 in ??? +#5 0x7f2a1ae1d4c9 in ??? +#6 0x7f2a1ae2570d in ??? +#7 0x7f2a1ae2afa1 in ??? +#8 0x43008b in ??? +#9 0x431c10 in ??? +#10 0x432d47 in ??? +#11 0x433b1e in ??? +#12 0x44a921 in ??? +#13 0x42ebbf in ??? +#14 0x40371e in ??? +#15 0x7f2a1a23feaf in ??? +#16 0x7f2a1a23ff5f in ??? +#17 0x403844 in ??? +#18 0xffffffffffffffff in ??? +./madX.sh: line 379: 3004240 Floating point exception(core dumped) $timecmd $cmd < ${tmpin} > ${tmp} +ERROR! ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' failed --- .../log_eemumu_mad_d_inl0_hrd0.txt | 132 ++++----- .../log_eemumu_mad_f_inl0_hrd0.txt | 132 ++++----- .../log_eemumu_mad_m_inl0_hrd0.txt | 138 ++++----- .../log_ggtt_mad_d_inl0_hrd0.txt | 130 ++++----- .../log_ggtt_mad_f_inl0_hrd0.txt | 130 ++++----- .../log_ggtt_mad_m_inl0_hrd0.txt | 132 ++++----- .../log_ggttg_mad_d_inl0_hrd0.txt | 134 ++++----- .../log_ggttg_mad_f_inl0_hrd0.txt | 132 ++++----- .../log_ggttg_mad_m_inl0_hrd0.txt | 130 ++++----- .../log_ggttgg_mad_d_inl0_hrd0.txt | 134 ++++----- .../log_ggttgg_mad_f_inl0_hrd0.txt | 136 ++++----- .../log_ggttgg_mad_m_inl0_hrd0.txt | 130 ++++----- .../log_ggttggg_mad_d_inl0_hrd0.txt | 134 ++++----- .../log_ggttggg_mad_f_inl0_hrd0.txt | 136 ++++----- .../log_ggttggg_mad_m_inl0_hrd0.txt | 132 ++++----- .../log_gqttq_mad_d_inl0_hrd0.txt | 136 ++++----- .../log_gqttq_mad_f_inl0_hrd0.txt | 265 ++++++------------ .../log_gqttq_mad_m_inl0_hrd0.txt | 134 ++++----- .../log_heftggbb_mad_d_inl0_hrd0.txt | 132 ++++----- .../log_heftggbb_mad_f_inl0_hrd0.txt | 34 +-- .../log_heftggbb_mad_m_inl0_hrd0.txt | 128 ++++----- .../log_smeftggtttt_mad_d_inl0_hrd0.txt | 134 ++++----- .../log_smeftggtttt_mad_f_inl0_hrd0.txt | 130 ++++----- .../log_smeftggtttt_mad_m_inl0_hrd0.txt | 130 ++++----- .../log_susyggt1t1_mad_d_inl0_hrd0.txt | 26 +- .../log_susyggt1t1_mad_f_inl0_hrd0.txt | 28 +- .../log_susyggt1t1_mad_m_inl0_hrd0.txt | 26 +- .../log_susyggtt_mad_d_inl0_hrd0.txt | 30 +- .../log_susyggtt_mad_f_inl0_hrd0.txt | 30 +- .../log_susyggtt_mad_m_inl0_hrd0.txt | 30 +- 30 files changed, 1640 insertions(+), 1745 deletions(-) diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index 80212fc9f8..41d66d8253 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,10 +1,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 @@ -13,8 +13,8 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' - make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-05-14_21:29:23 +DATE: 2024-05-16_01:25:56 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3798 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.7490s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7405s - [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.58E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7231s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7147s + [COUNTERS] Fortran MEs ( 1 ) : 0.0084s for 8192 events => throughput is 9.76E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1860s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1773s - [COUNTERS] Fortran MEs ( 1 ) : 0.0087s for 8192 events => throughput is 9.42E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.1857s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1771s + [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.54E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3906s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2991s - [COUNTERS] Fortran MEs ( 1 ) : 0.0915s for 90112 events => throughput is 9.84E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3949s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3024s + [COUNTERS] Fortran MEs ( 1 ) : 0.0925s for 90112 events => throughput is 9.74E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,8 +133,8 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661545E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1910s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1841s + [COUNTERS] PROGRAM TOTAL : 0.1913s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1844s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0069s for 8192 events => throughput is 1.18E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3806s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3053s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0753s for 90112 events => throughput is 1.20E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3835s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3081s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0754s for 90112 events => throughput is 1.20E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.198292e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.192081e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.189902e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.204613e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1870s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1826s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0044s for 8192 events => throughput is 1.86E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1864s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1822s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0042s for 8192 events => throughput is 1.95E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3505s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3034s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0470s for 90112 events => throughput is 1.92E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3602s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3125s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0476s for 90112 events => throughput is 1.89E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.910605e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.953763e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.993478e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.009226e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1839s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1805s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0034s for 8192 events => throughput is 2.39E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1833s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1802s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.59E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3398s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3038s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0360s for 90112 events => throughput is 2.50E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3431s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3065s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0366s for 90112 events => throughput is 2.46E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.544384e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.541984e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.669137e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.616899e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1836s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1804s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.59E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1841s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1810s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.69E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3423s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3080s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0343s for 90112 events => throughput is 2.62E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3388s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3042s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0346s for 90112 events => throughput is 2.61E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.714748e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.662866e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.793958e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.883371e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1866s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1825s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0041s for 8192 events => throughput is 2.00E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1859s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1818s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0041s for 8192 events => throughput is 1.99E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3507s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3069s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0439s for 90112 events => throughput is 2.05E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3474s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3044s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0430s for 90112 events => throughput is 2.10E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.062300e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.029340e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.177526e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.231218e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.6079s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6074s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.64E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6140s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6135s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.60E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.7354s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7304s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0050s for 90112 events => throughput is 1.81E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7363s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7314s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0049s for 90112 events => throughput is 1.86E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.128413e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.277665e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.930164e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.916168e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.001091e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.959957e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.445549e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.493136e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.976571e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.970202e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.020573e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.040191e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.977453e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.002261e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.141008e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.140061e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index 2651e2b252..c4c8099bbf 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -14,15 +14,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-05-14_21:29:39 +DATE: 2024-05-16_01:26:13 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3798 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.7223s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7137s - [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7287s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7200s + [COUNTERS] Fortran MEs ( 1 ) : 0.0087s for 8192 events => throughput is 9.42E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1844s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1759s - [COUNTERS] Fortran MEs ( 1 ) : 0.0085s for 8192 events => throughput is 9.58E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.1869s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1783s + [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.57E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3930s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3006s - [COUNTERS] Fortran MEs ( 1 ) : 0.0924s for 90112 events => throughput is 9.75E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3902s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2987s + [COUNTERS] Fortran MEs ( 1 ) : 0.0916s for 90112 events => throughput is 9.84E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382703205998396E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1878s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1812s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 8192 events => throughput is 1.24E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1903s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1836s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0067s for 8192 events => throughput is 1.22E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515590123565249E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3826s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3078s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0749s for 90112 events => throughput is 1.20E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3825s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3093s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0732s for 90112 events => throughput is 1.23E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.250685e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.260929e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.267147e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.250210e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382700723828302E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1844s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1816s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.97E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1813s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1787s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.15E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515587612890761E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3282s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2991s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0292s for 90112 events => throughput is 3.09E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3288s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2997s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0290s for 90112 events => throughput is 3.10E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.188800e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.206836e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.348517e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.334282e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382700679354239E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1838s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1814s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0023s for 8192 events => throughput is 3.51E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1835s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1811s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0024s for 8192 events => throughput is 3.38E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515587619408464E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3291s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3023s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0268s for 90112 events => throughput is 3.36E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3304s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3034s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0270s for 90112 events => throughput is 3.34E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.394421e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.522447e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.668083e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.386931e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382700679354239E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1842s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1818s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0024s for 8192 events => throughput is 3.35E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1829s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1805s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0024s for 8192 events => throughput is 3.41E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515587619408464E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3329s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3063s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0265s for 90112 events => throughput is 3.40E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3301s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3033s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0268s for 90112 events => throughput is 3.37E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.309620e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.528072e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.369965e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.720927e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382704335459282E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1855s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1829s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 3.08E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1844s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1819s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.21E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515591296252558E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3340s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3060s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0280s for 90112 events => throughput is 3.22E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3371s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3080s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0291s for 90112 events => throughput is 3.10E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.342049e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.341186e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.540514e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.598530e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382706077425631E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.6040s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6035s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.64E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6090s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6085s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.68E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515592892887687E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.7354s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7306s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0047s for 90112 events => throughput is 1.91E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7344s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7297s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0046s for 90112 events => throughput is 1.95E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.575616e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.546893e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.796191e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.804903e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.392977e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.477327e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.031140e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.060127e+09 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.306294e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.389797e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.253384e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.251129e+09 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.748696e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.752691e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.478402e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.481445e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index c72828112e..fc86f120db 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -1,27 +1,27 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' - -make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. + +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-05-14_21:29:56 +DATE: 2024-05-16_01:26:29 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3798 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.7203s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7117s - [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.52E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7237s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7153s + [COUNTERS] Fortran MEs ( 1 ) : 0.0084s for 8192 events => throughput is 9.74E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1854s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1768s - [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.1861s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1773s + [COUNTERS] Fortran MEs ( 1 ) : 0.0087s for 8192 events => throughput is 9.40E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3927s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3002s - [COUNTERS] Fortran MEs ( 1 ) : 0.0925s for 90112 events => throughput is 9.75E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4045s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3124s + [COUNTERS] Fortran MEs ( 1 ) : 0.0920s for 90112 events => throughput is 9.79E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715420701395E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1918s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1847s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0071s for 8192 events => throughput is 1.15E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1967s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1895s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0072s for 8192 events => throughput is 1.14E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602033080859E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3882s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3114s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0768s for 90112 events => throughput is 1.17E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3901s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3110s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0791s for 90112 events => throughput is 1.14E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.190112e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.191141e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.202467e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.200935e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715420701354E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1883s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1842s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0041s for 8192 events => throughput is 1.97E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1857s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1815s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0041s for 8192 events => throughput is 1.98E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,8 +242,8 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602033080859E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3510s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3054s + [COUNTERS] PROGRAM TOTAL : 0.3486s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3030s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0456s for 90112 events => throughput is 1.98E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.027888e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.010123e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.074464e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.071657e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1854s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1821s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.47E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1848s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1815s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.49E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3387s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3028s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0359s for 90112 events => throughput is 2.51E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3402s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3039s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0363s for 90112 events => throughput is 2.49E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.529645e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.462700e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.689842e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.639506e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1848s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1817s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.69E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1850s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1819s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.61E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3386s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3043s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0343s for 90112 events => throughput is 2.63E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3431s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3079s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0351s for 90112 events => throughput is 2.56E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.665204e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.637002e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.827156e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.800572e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1860s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1821s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0039s for 8192 events => throughput is 2.12E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1865s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1828s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0037s for 8192 events => throughput is 2.21E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3469s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3049s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0420s for 90112 events => throughput is 2.14E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3487s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3069s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0418s for 90112 events => throughput is 2.16E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.187638e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.197743e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.271730e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.278261e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715392009194E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.6078s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6073s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.62E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6095s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6090s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.64E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602021089631E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.7346s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7297s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0049s for 90112 events => throughput is 1.83E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7369s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7319s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0050s for 90112 events => throughput is 1.81E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.048447e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.356139e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.874128e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.953546e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.969250e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.960740e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.511559e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.522141e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.951647e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.009432e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.090814e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.090602e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.977042e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.972046e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.160627e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.157381e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index d1f7d6c917..e1be7813b6 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -3,8 +3,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-05-14_21:30:13 +DATE: 2024-05-16_01:26:45 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 2601 events (found 5405 events) - [COUNTERS] PROGRAM TOTAL : 0.8150s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7712s - [COUNTERS] Fortran MEs ( 1 ) : 0.0439s for 8192 events => throughput is 1.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8221s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7787s + [COUNTERS] Fortran MEs ( 1 ) : 0.0433s for 8192 events => throughput is 1.89E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4153s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3722s - [COUNTERS] Fortran MEs ( 1 ) : 0.0431s for 8192 events => throughput is 1.90E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4146s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3704s + [COUNTERS] Fortran MEs ( 1 ) : 0.0442s for 8192 events => throughput is 1.85E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7541s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2769s - [COUNTERS] Fortran MEs ( 1 ) : 0.4771s for 90112 events => throughput is 1.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7473s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2714s + [COUNTERS] Fortran MEs ( 1 ) : 0.4758s for 90112 events => throughput is 1.89E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4517s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4124s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0393s for 8192 events => throughput is 2.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4516s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4120s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0396s for 8192 events => throughput is 2.07E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.8277s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3920s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4358s for 90112 events => throughput is 2.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8025s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3670s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4355s for 90112 events => throughput is 2.07E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.103817e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.132783e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.114937e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.139840e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4324s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4093s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0231s for 8192 events => throughput is 3.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4276s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4032s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0244s for 8192 events => throughput is 3.36E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989106] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.5932s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3412s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2520s for 90112 events => throughput is 3.58E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6069s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3541s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2528s for 90112 events => throughput is 3.56E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.529919e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.613591e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.626084e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.632391e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4008s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3871s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0137s for 8192 events => throughput is 5.97E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3994s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3855s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0139s for 8192 events => throughput is 5.89E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4953s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3368s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1584s for 90112 events => throughput is 5.69E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5056s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3497s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1559s for 90112 events => throughput is 5.78E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.599755e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.889737e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.587845e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.921310e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3964s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3835s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0129s for 8192 events => throughput is 6.33E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3995s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3870s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0125s for 8192 events => throughput is 6.55E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,8 +394,8 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4805s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3407s + [COUNTERS] PROGRAM TOTAL : 1.4813s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3413s [COUNTERS] CudaCpp MEs ( 2 ) : 0.1399s for 90112 events => throughput is 6.44E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.631615e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.578026e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.759324e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.673606e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4140s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3923s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0216s for 8192 events => throughput is 3.78E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4212s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3991s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0221s for 8192 events => throughput is 3.70E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.5804s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3435s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2370s for 90112 events => throughput is 3.80E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5933s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3549s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2384s for 90112 events => throughput is 3.78E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.786863e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.816986e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.873596e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.814285e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.8079s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8073s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.45E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.8124s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8118s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.42E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7757s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7691s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0067s for 90112 events => throughput is 1.35E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7642s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7574s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0067s for 90112 events => throughput is 1.34E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.922671e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.120396e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.622319e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.622859e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.182413e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.177398e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.074544e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.080565e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.169853e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.172657e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.157576e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.155839e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.187954e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.173872e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.069859e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.068966e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index e1be3248ae..0b367d2d96 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -1,11 +1,11 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx - make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 + +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-05-14_21:30:39 +DATE: 2024-05-16_01:27:12 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 2601 events (found 5405 events) - [COUNTERS] PROGRAM TOTAL : 0.8170s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7734s - [COUNTERS] Fortran MEs ( 1 ) : 0.0435s for 8192 events => throughput is 1.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8191s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7751s + [COUNTERS] Fortran MEs ( 1 ) : 0.0439s for 8192 events => throughput is 1.87E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,8 +83,8 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4189s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3755s + [COUNTERS] PROGRAM TOTAL : 0.4113s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3679s [COUNTERS] Fortran MEs ( 1 ) : 0.0434s for 8192 events => throughput is 1.89E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7459s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2697s - [COUNTERS] Fortran MEs ( 1 ) : 0.4762s for 90112 events => throughput is 1.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7478s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2691s + [COUNTERS] Fortran MEs ( 1 ) : 0.4787s for 90112 events => throughput is 1.88E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094179780921394] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4442s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4078s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0365s for 8192 events => throughput is 2.25E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4476s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4108s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0368s for 8192 events => throughput is 2.23E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105688579298537] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7649s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3605s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4044s for 90112 events => throughput is 2.23E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7717s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3659s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4058s for 90112 events => throughput is 2.22E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.289579e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.257844e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.288922e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.292052e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094175850060040] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4010s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3854s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0156s for 8192 events => throughput is 5.26E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4035s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3878s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0157s for 8192 events => throughput is 5.23E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105684763984058] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.5082s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3361s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1720s for 90112 events => throughput is 5.24E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5166s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3428s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1737s for 90112 events => throughput is 5.19E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.210735e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.218996e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.237575e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.263655e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094173652938650] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3886s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3803s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0083s for 8192 events => throughput is 9.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3915s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3833s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0082s for 8192 events => throughput is 1.00E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105684048677361] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4256s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3332s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0924s for 90112 events => throughput is 9.76E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4300s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3376s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0925s for 90112 events => throughput is 9.74E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.818330e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.896073e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.013113e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.940492e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094173652938650] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3954s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3877s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0077s for 8192 events => throughput is 1.07E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3898s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3819s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0079s for 8192 events => throughput is 1.04E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105684048677361] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4218s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3364s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0854s for 90112 events => throughput is 1.05E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.4168s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3312s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0856s for 90112 events => throughput is 1.05E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.068593e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.032975e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.099468e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.050779e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094178213275804] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3957s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3845s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0112s for 8192 events => throughput is 7.32E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3948s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3837s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0111s for 8192 events => throughput is 7.41E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105688407939567] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4559s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3326s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1233s for 90112 events => throughput is 7.31E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4646s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3397s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1249s for 90112 events => throughput is 7.22E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.366839e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.304914e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.492192e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.408593e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184344050284] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.8062s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8057s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.49E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.8097s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8091s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.50E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105694586476879] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7618s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7560s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0058s for 90112 events => throughput is 1.56E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7654s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7594s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0060s for 90112 events => throughput is 1.51E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.250794e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.397168e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.270403e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.912682e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.044831e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.099083e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.775580e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.785250e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.128987e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.065057e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.894720e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.885291e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.630183e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.649544e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.416620e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.436840e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index 547b1e3490..197f6200da 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -1,22 +1,22 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 + make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-05-14_21:31:05 +DATE: 2024-05-16_01:27:37 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 2601 events (found 5405 events) - [COUNTERS] PROGRAM TOTAL : 0.8179s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7740s - [COUNTERS] Fortran MEs ( 1 ) : 0.0438s for 8192 events => throughput is 1.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8243s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7810s + [COUNTERS] Fortran MEs ( 1 ) : 0.0433s for 8192 events => throughput is 1.89E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4094s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3663s - [COUNTERS] Fortran MEs ( 1 ) : 0.0431s for 8192 events => throughput is 1.90E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4109s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3676s + [COUNTERS] Fortran MEs ( 1 ) : 0.0433s for 8192 events => throughput is 1.89E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7458s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2700s - [COUNTERS] Fortran MEs ( 1 ) : 0.4758s for 90112 events => throughput is 1.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7536s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2749s + [COUNTERS] Fortran MEs ( 1 ) : 0.4787s for 90112 events => throughput is 1.88E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094186141863901] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4517s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4119s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0398s for 8192 events => throughput is 2.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4520s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4121s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0400s for 8192 events => throughput is 2.05E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696630006634] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7993s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3599s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4394s for 90112 events => throughput is 2.05E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8139s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3740s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4399s for 90112 events => throughput is 2.05E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.083387e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.079375e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.080671e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.079476e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094186141863901] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4168s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3942s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0226s for 8192 events => throughput is 3.63E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4154s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3930s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0224s for 8192 events => throughput is 3.67E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696630006626] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.5919s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3452s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2467s for 90112 events => throughput is 3.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5989s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3477s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2513s for 90112 events => throughput is 3.59E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.684790e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.535235e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.710839e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.688874e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4003s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3863s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0140s for 8192 events => throughput is 5.84E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4007s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3868s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0139s for 8192 events => throughput is 5.90E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4924s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3396s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1528s for 90112 events => throughput is 5.90E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5000s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3471s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1530s for 90112 events => throughput is 5.89E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.936975e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.781843e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.056847e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.986506e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3980s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3847s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0133s for 8192 events => throughput is 6.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4023s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3898s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0125s for 8192 events => throughput is 6.55E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4785s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3422s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1363s for 90112 events => throughput is 6.61E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4677s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3316s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1361s for 90112 events => throughput is 6.62E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.654373e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.671424e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.774862e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.782154e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,8 +437,8 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4158s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3952s + [COUNTERS] PROGRAM TOTAL : 0.4150s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3944s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0206s for 8192 events => throughput is 3.98E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.5859s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3571s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2288s for 90112 events => throughput is 3.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5820s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3527s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2292s for 90112 events => throughput is 3.93E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.944890e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.998616e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.998332e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.990048e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,8 +513,8 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184798437830] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.8121s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8115s + [COUNTERS] PROGRAM TOTAL : 0.8154s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8148s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.45E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279068492] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7651s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7584s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0067s for 90112 events => throughput is 1.35E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7703s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7635s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0068s for 90112 events => throughput is 1.32E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.005307e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.143723e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.597005e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.636090e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.161473e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.182886e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.067591e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.066867e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.173443e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.180722e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.146779e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.146460e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.155242e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.158610e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.034637e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.030823e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index 20b4910bf1..7f0ff41464 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -2,21 +2,21 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda - make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 + +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' - -make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-05-14_21:31:32 +DATE: 2024-05-16_01:28:04 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 365 events (found 1496 events) - [COUNTERS] PROGRAM TOTAL : 0.7043s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3683s - [COUNTERS] Fortran MEs ( 1 ) : 0.3360s for 8192 events => throughput is 2.44E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7020s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3669s + [COUNTERS] Fortran MEs ( 1 ) : 0.3350s for 8192 events => throughput is 2.45E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6655s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3301s - [COUNTERS] Fortran MEs ( 1 ) : 0.3353s for 8192 events => throughput is 2.44E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6627s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3273s + [COUNTERS] Fortran MEs ( 1 ) : 0.3354s for 8192 events => throughput is 2.44E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.2767s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5820s - [COUNTERS] Fortran MEs ( 1 ) : 3.6947s for 90112 events => throughput is 2.44E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.2831s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5823s + [COUNTERS] Fortran MEs ( 1 ) : 3.7008s for 90112 events => throughput is 2.43E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 1.0164s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6720s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3444s for 8192 events => throughput is 2.38E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.0127s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6679s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3448s for 8192 events => throughput is 2.38E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717666E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.6982s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9128s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.7854s for 90112 events => throughput is 2.38E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.7090s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9162s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.7928s for 90112 events => throughput is 2.38E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.444585e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.444412e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.447688e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.433186e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607748863] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6808s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5022s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1786s for 8192 events => throughput is 4.59E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6869s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5073s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1796s for 8192 events => throughput is 4.56E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717666E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 3.7507s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7497s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.0010s for 90112 events => throughput is 4.50E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.7766s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7859s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.9906s for 90112 events => throughput is 4.53E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.665141e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.630294e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.657093e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.629345e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5068s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4164s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0904s for 8192 events => throughput is 9.06E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5178s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4262s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0916s for 8192 events => throughput is 8.94E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.6510s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6594s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9915s for 90112 events => throughput is 9.09E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.7021s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6979s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0042s for 90112 events => throughput is 8.97E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.408887e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.244917e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.495539e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.251295e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4848s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4059s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0789s for 8192 events => throughput is 1.04E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4953s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4129s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0824s for 8192 events => throughput is 9.95E+04 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.5315s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6496s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8819s for 90112 events => throughput is 1.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.5931s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6918s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9014s for 90112 events => throughput is 1.00E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.069797e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.033892e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.059648e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.035720e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5571s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4421s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1150s for 8192 events => throughput is 7.13E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5700s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4516s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1184s for 8192 events => throughput is 6.92E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.9579s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6912s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2666s for 90112 events => throughput is 7.11E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.0137s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7172s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2965s for 90112 events => throughput is 6.95E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.192271e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.144179e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.202521e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.156532e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,8 +513,8 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.7731s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7677s + [COUNTERS] PROGRAM TOTAL : 0.7726s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7672s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.52E+06 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717736E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.0321s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0091s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0230s for 90112 events => throughput is 3.91E+06 events/s + [COUNTERS] PROGRAM TOTAL : 2.0397s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0164s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0233s for 90112 events => throughput is 3.87E+06 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.630446e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.642318e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.998102e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.930638e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.885438e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.882259e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.245221e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.244433e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.903636e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.893041e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.255324e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.255841e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.895996e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.907568e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.770834e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.774192e+06 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 2d40c641c2..1a8c36aa43 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -1,10 +1,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg - make USEBUILDDIR=1 BACKEND=cuda - make USEBUILDDIR=1 BACKEND=cppnone + + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-05-14_21:32:16 +DATE: 2024-05-16_01:28:48 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 365 events (found 1496 events) - [COUNTERS] PROGRAM TOTAL : 0.7000s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3644s - [COUNTERS] Fortran MEs ( 1 ) : 0.3356s for 8192 events => throughput is 2.44E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7057s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3678s + [COUNTERS] Fortran MEs ( 1 ) : 0.3379s for 8192 events => throughput is 2.42E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6615s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3271s - [COUNTERS] Fortran MEs ( 1 ) : 0.3345s for 8192 events => throughput is 2.45E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6650s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3295s + [COUNTERS] Fortran MEs ( 1 ) : 0.3355s for 8192 events => throughput is 2.44E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.2699s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5798s - [COUNTERS] Fortran MEs ( 1 ) : 3.6901s for 90112 events => throughput is 2.44E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.3113s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5981s + [COUNTERS] Fortran MEs ( 1 ) : 3.7132s for 90112 events => throughput is 2.43E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112722621426752] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.9861s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6530s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3330s for 8192 events => throughput is 2.46E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.9881s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6537s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3344s for 8192 events => throughput is 2.45E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238468310179624E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.5670s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8948s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.6722s for 90112 events => throughput is 2.45E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.5951s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9082s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.6869s for 90112 events => throughput is 2.44E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.527255e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.531268e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.545228e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.536337e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112720710186394] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5309s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4314s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0996s for 8192 events => throughput is 8.23E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5295s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4297s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0998s for 8192 events => throughput is 8.21E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238454786658835E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.7616s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6665s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0951s for 90112 events => throughput is 8.23E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.7829s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6780s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1050s for 90112 events => throughput is 8.16E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.414768e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.418674e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.429788e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.378970e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112721766950902] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4195s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3735s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0461s for 8192 events => throughput is 1.78E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4226s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3763s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0463s for 8192 events => throughput is 1.77E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238453735016964E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.1275s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6152s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5123s for 90112 events => throughput is 1.76E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.1354s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6236s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5118s for 90112 events => throughput is 1.76E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.835666e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.814902e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.794023e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.818224e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112721766950902] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4095s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3681s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0414s for 8192 events => throughput is 1.98E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4122s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3704s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0417s for 8192 events => throughput is 1.96E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238453735016964E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.0664s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6096s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4568s for 90112 events => throughput is 1.97E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0774s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6180s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4593s for 90112 events => throughput is 1.96E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.989836e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.016267e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.035089e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.024583e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112723387847480] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4388s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3836s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0552s for 8192 events => throughput is 1.48E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4393s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3831s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0562s for 8192 events => throughput is 1.46E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,8 +470,8 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238464410949921E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.2419s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6268s + [COUNTERS] PROGRAM TOTAL : 2.2433s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6283s [COUNTERS] CudaCpp MEs ( 2 ) : 0.6150s for 90112 events => throughput is 1.47E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.504406e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.471448e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.497982e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.496104e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112726034625694] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.7631s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7623s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0009s for 8192 events => throughput is 9.63E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7661s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7652s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0009s for 8192 events => throughput is 9.57E+06 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238473828077680E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.0121s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0022s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0099s for 90112 events => throughput is 9.09E+06 events/s + [COUNTERS] PROGRAM TOTAL : 2.0217s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0116s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0101s for 90112 events => throughput is 8.94E+06 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.300251e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.279804e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.847750e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.849139e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.699331e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.708780e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.419025e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.376255e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.716824e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.741880e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.489710e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.526731e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.582209e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.576787e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.625187e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.628936e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index 799e6a6c40..06cc385635 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -1,8 +1,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 @@ -13,8 +13,8 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' - make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' + make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-05-14_21:32:55 +DATE: 2024-05-16_01:29:27 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 365 events (found 1496 events) - [COUNTERS] PROGRAM TOTAL : 0.7032s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3673s - [COUNTERS] Fortran MEs ( 1 ) : 0.3358s for 8192 events => throughput is 2.44E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7042s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3668s + [COUNTERS] Fortran MEs ( 1 ) : 0.3374s for 8192 events => throughput is 2.43E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6642s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3295s - [COUNTERS] Fortran MEs ( 1 ) : 0.3347s for 8192 events => throughput is 2.45E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6678s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3296s + [COUNTERS] Fortran MEs ( 1 ) : 0.3383s for 8192 events => throughput is 2.42E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.2826s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5869s - [COUNTERS] Fortran MEs ( 1 ) : 3.6957s for 90112 events => throughput is 2.44E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.3035s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5927s + [COUNTERS] Fortran MEs ( 1 ) : 3.7109s for 90112 events => throughput is 2.43E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748700702684] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 1.0162s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6692s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3470s for 8192 events => throughput is 2.36E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.0211s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6702s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3509s for 8192 events => throughput is 2.33E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238482679400354E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.7567s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9108s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.8459s for 90112 events => throughput is 2.34E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.7816s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9205s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.8611s for 90112 events => throughput is 2.33E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.412345e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.398467e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.420469e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.396772e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748702805033] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6728s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4982s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1746s for 8192 events => throughput is 4.69E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6812s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5038s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1774s for 8192 events => throughput is 4.62E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238482683055667E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 3.6634s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7403s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.9231s for 90112 events => throughput is 4.69E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.6988s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7524s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.9464s for 90112 events => throughput is 4.63E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.781863e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.777911e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.781998e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.770421e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748681415580] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5053s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4166s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0887s for 8192 events => throughput is 9.24E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5041s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4149s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0892s for 8192 events => throughput is 9.19E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238482534347232E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.6573s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6705s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9868s for 90112 events => throughput is 9.13E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.6526s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6680s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9846s for 90112 events => throughput is 9.15E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.325042e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.374488e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.300112e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.304457e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748681415580] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4897s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4093s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0805s for 8192 events => throughput is 1.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4867s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4079s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0788s for 8192 events => throughput is 1.04E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238482534347232E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.5215s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6536s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8679s for 90112 events => throughput is 1.04E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.5251s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6551s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8699s for 90112 events => throughput is 1.04E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.077283e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.072957e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.075927e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.074127e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748700265108] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5656s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4472s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1184s for 8192 events => throughput is 6.92E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5672s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4475s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1197s for 8192 events => throughput is 6.85E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238482666076374E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 3.0008s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6905s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.3103s for 90112 events => throughput is 6.88E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.0147s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7041s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.3105s for 90112 events => throughput is 6.88E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.041319e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.810756e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.003038e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.935663e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,8 +513,8 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748601943165] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.7759s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7704s + [COUNTERS] PROGRAM TOTAL : 0.7757s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7703s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.51E+06 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481937154381E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.0297s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0066s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0231s for 90112 events => throughput is 3.90E+06 events/s + [COUNTERS] PROGRAM TOTAL : 2.0407s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0176s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0230s for 90112 events => throughput is 3.91E+06 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.633846e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.631069e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.090737e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.120692e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.829640e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.856212e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.232989e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.234939e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.835721e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.866138e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.245871e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.243613e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.822524e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.862499e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.726985e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.731505e+06 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index 324ee1a9b7..744dd47e66 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -1,20 +1,20 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg - make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 + make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cppavx2 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -22,8 +22,8 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-05-14_21:33:38 +DATE: 2024-05-16_01:30:11 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 [UNWEIGHT] Wrote 62 events (found 950 events) - [COUNTERS] PROGRAM TOTAL : 4.7297s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3579s - [COUNTERS] Fortran MEs ( 1 ) : 4.3718s for 8192 events => throughput is 1.87E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.8074s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3646s + [COUNTERS] Fortran MEs ( 1 ) : 4.4427s for 8192 events => throughput is 1.84E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 4.7277s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3553s - [COUNTERS] Fortran MEs ( 1 ) : 4.3724s for 8192 events => throughput is 1.87E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.8076s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3603s + [COUNTERS] Fortran MEs ( 1 ) : 4.4473s for 8192 events => throughput is 1.84E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=0 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 50.2700s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1537s - [COUNTERS] Fortran MEs ( 1 ) : 48.1163s for 90112 events => throughput is 1.87E+03 events/s + [COUNTERS] PROGRAM TOTAL : 50.3676s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1555s + [COUNTERS] Fortran MEs ( 1 ) : 48.2121s for 90112 events => throughput is 1.87E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102372E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 9.2003s - [COUNTERS] Fortran Overhead ( 0 ) : 4.7147s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.4855s for 8192 events => throughput is 1.83E+03 events/s + [COUNTERS] PROGRAM TOTAL : 9.2196s + [COUNTERS] Fortran Overhead ( 0 ) : 4.7253s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.4943s for 8192 events => throughput is 1.82E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438642451704E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 55.9449s - [COUNTERS] Fortran Overhead ( 0 ) : 6.4916s - [COUNTERS] CudaCpp MEs ( 2 ) : 49.4532s for 90112 events => throughput is 1.82E+03 events/s + [COUNTERS] PROGRAM TOTAL : 56.2683s + [COUNTERS] Fortran Overhead ( 0 ) : 6.5201s + [COUNTERS] CudaCpp MEs ( 2 ) : 49.7482s for 90112 events => throughput is 1.81E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.878281e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.868635e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.879820e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.874481e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102367E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 5.0313s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6432s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.3881s for 8192 events => throughput is 3.43E+03 events/s + [COUNTERS] PROGRAM TOTAL : 5.0549s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6552s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.3997s for 8192 events => throughput is 3.41E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438642451701E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 30.7303s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4175s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.3128s for 90112 events => throughput is 3.42E+03 events/s + [COUNTERS] PROGRAM TOTAL : 30.7729s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4162s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.3568s for 90112 events => throughput is 3.42E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.595271e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.598914e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.616783e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.606768e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102367E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 2.4110s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3696s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0415s for 8192 events => throughput is 7.87E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.4142s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3728s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0413s for 8192 events => throughput is 7.87E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 14.5874s - [COUNTERS] Fortran Overhead ( 0 ) : 3.1244s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.4631s for 90112 events => throughput is 7.86E+03 events/s + [COUNTERS] PROGRAM TOTAL : 14.6025s + [COUNTERS] Fortran Overhead ( 0 ) : 3.1268s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.4757s for 90112 events => throughput is 7.85E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.080696e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.082204e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.003260e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.087591e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102367E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 2.1884s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2624s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9260s for 8192 events => throughput is 8.85E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.1594s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2438s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9156s for 8192 events => throughput is 8.95E+03 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 13.3165s - [COUNTERS] Fortran Overhead ( 0 ) : 3.0431s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.2735s for 90112 events => throughput is 8.77E+03 events/s + [COUNTERS] PROGRAM TOTAL : 13.1097s + [COUNTERS] Fortran Overhead ( 0 ) : 3.0095s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.1002s for 90112 events => throughput is 8.92E+03 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.044942e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.157056e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.036114e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.190937e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102367E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 2.7894s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5694s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2200s for 8192 events => throughput is 6.71E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7205s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5345s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1860s for 8192 events => throughput is 6.91E+03 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 16.8074s - [COUNTERS] Fortran Overhead ( 0 ) : 3.3631s - [COUNTERS] CudaCpp MEs ( 2 ) : 13.4443s for 90112 events => throughput is 6.70E+03 events/s + [COUNTERS] PROGRAM TOTAL : 16.4021s + [COUNTERS] Fortran Overhead ( 0 ) : 3.3045s + [COUNTERS] CudaCpp MEs ( 2 ) : 13.0976s for 90112 events => throughput is 6.88E+03 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.788020e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.943395e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.794108e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.968493e+03 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,8 +513,8 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102367E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 0.9054s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8724s + [COUNTERS] PROGRAM TOTAL : 0.9037s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8707s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0330s for 8192 events => throughput is 2.48E+05 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 3.0251s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6614s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3637s for 90112 events => throughput is 2.48E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.9957s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6318s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3638s for 90112 events => throughput is 2.48E+05 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.279557e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.275863e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.504315e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.513394e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.123492e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.126700e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.160692e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.163753e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.113945e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.128674e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.155779e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.183392e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.135740e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.130320e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.451591e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.455396e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index be512c6871..97726609cd 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -1,22 +1,22 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg - make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 + +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-05-14_21:38:06 +DATE: 2024-05-16_01:34:39 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 [UNWEIGHT] Wrote 62 events (found 950 events) - [COUNTERS] PROGRAM TOTAL : 4.8156s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3706s - [COUNTERS] Fortran MEs ( 1 ) : 4.4449s for 8192 events => throughput is 1.84E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.7506s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3591s + [COUNTERS] Fortran MEs ( 1 ) : 4.3916s for 8192 events => throughput is 1.87E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 4.8102s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3614s - [COUNTERS] Fortran MEs ( 1 ) : 4.4489s for 8192 events => throughput is 1.84E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.7327s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3562s + [COUNTERS] Fortran MEs ( 1 ) : 4.3765s for 8192 events => throughput is 1.87E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=0 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 51.1256s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1878s - [COUNTERS] Fortran MEs ( 1 ) : 48.9378s for 90112 events => throughput is 1.84E+03 events/s + [COUNTERS] PROGRAM TOTAL : 50.4568s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1771s + [COUNTERS] Fortran MEs ( 1 ) : 48.2797s for 90112 events => throughput is 1.87E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703729438336302E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 9.0331s - [COUNTERS] Fortran Overhead ( 0 ) : 4.6319s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.4012s for 8192 events => throughput is 1.86E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.9135s + [COUNTERS] Fortran Overhead ( 0 ) : 4.5714s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.3421s for 8192 events => throughput is 1.89E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793486626492658E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 55.2131s - [COUNTERS] Fortran Overhead ( 0 ) : 6.4300s - [COUNTERS] CudaCpp MEs ( 2 ) : 48.7831s for 90112 events => throughput is 1.85E+03 events/s + [COUNTERS] PROGRAM TOTAL : 54.1190s + [COUNTERS] Fortran Overhead ( 0 ) : 6.3214s + [COUNTERS] CudaCpp MEs ( 2 ) : 47.7976s for 90112 events => throughput is 1.89E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.921273e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.947180e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.922301e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.947353e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703722581317850E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 2.7702s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5550s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2152s for 8192 events => throughput is 6.74E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7184s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5261s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1923s for 8192 events => throughput is 6.87E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793483759856148E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 16.7184s - [COUNTERS] Fortran Overhead ( 0 ) : 3.3394s - [COUNTERS] CudaCpp MEs ( 2 ) : 13.3791s for 90112 events => throughput is 6.74E+03 events/s + [COUNTERS] PROGRAM TOTAL : 16.5068s + [COUNTERS] Fortran Overhead ( 0 ) : 3.3022s + [COUNTERS] CudaCpp MEs ( 2 ) : 13.2046s for 90112 events => throughput is 6.82E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.894177e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.978396e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.934836e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.983793e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703722425602170E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 1.4158s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8799s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5359s for 8192 events => throughput is 1.53E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.4122s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8788s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5334s for 8192 events => throughput is 1.54E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793483698376133E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 8.5879s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6705s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.9174s for 90112 events => throughput is 1.52E+04 events/s + [COUNTERS] PROGRAM TOTAL : 8.5565s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6677s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.8888s for 90112 events => throughput is 1.53E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.575380e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.577584e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.571062e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.577489e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703722425602170E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 1.2970s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8246s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4724s for 8192 events => throughput is 1.73E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.2916s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8193s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4723s for 8192 events => throughput is 1.73E+04 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793483698376133E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 7.8247s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6015s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.2232s for 90112 events => throughput is 1.73E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.7633s + [COUNTERS] Fortran Overhead ( 0 ) : 2.5957s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.1676s for 90112 events => throughput is 1.74E+04 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.707243e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.818661e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.779309e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.824534e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703728658657426E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 1.5574s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9553s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6021s for 8192 events => throughput is 1.36E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.5274s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9418s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5856s for 8192 events => throughput is 1.40E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793486977281547E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 9.3785s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7424s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.6361s for 90112 events => throughput is 1.36E+04 events/s + [COUNTERS] PROGRAM TOTAL : 9.1749s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6987s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.4761s for 90112 events => throughput is 1.39E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.370479e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.413533e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.379532e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.415193e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703736267486325E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 0.8882s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8668s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0214s for 8192 events => throughput is 3.84E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8657s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8443s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0214s for 8192 events => throughput is 3.83E+05 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793489323670813E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 2.8658s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6303s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2355s for 90112 events => throughput is 3.83E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.8423s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6065s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2358s for 90112 events => throughput is 3.82E+05 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.577461e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.583101e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.937327e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.931306e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.565490e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.570292e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.722014e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.724382e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.574587e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.573894e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.694383e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.720622e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.565279e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.518798e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.537576e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.538253e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 29ba71ed95..9161616d22 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -3,9 +3,9 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppnone - make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-05-14_21:41:40 +DATE: 2024-05-16_01:38:10 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 [UNWEIGHT] Wrote 62 events (found 950 events) - [COUNTERS] PROGRAM TOTAL : 4.8133s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3646s - [COUNTERS] Fortran MEs ( 1 ) : 4.4488s for 8192 events => throughput is 1.84E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.7461s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3596s + [COUNTERS] Fortran MEs ( 1 ) : 4.3864s for 8192 events => throughput is 1.87E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 4.8064s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3598s - [COUNTERS] Fortran MEs ( 1 ) : 4.4466s for 8192 events => throughput is 1.84E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.7384s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3570s + [COUNTERS] Fortran MEs ( 1 ) : 4.3814s for 8192 events => throughput is 1.87E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=0 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 51.1214s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1855s - [COUNTERS] Fortran MEs ( 1 ) : 48.9359s for 90112 events => throughput is 1.84E+03 events/s + [COUNTERS] PROGRAM TOTAL : 50.4226s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1630s + [COUNTERS] Fortran MEs ( 1 ) : 48.2596s for 90112 events => throughput is 1.87E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612659176674E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 9.4720s - [COUNTERS] Fortran Overhead ( 0 ) : 4.8531s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.6189s for 8192 events => throughput is 1.77E+03 events/s + [COUNTERS] PROGRAM TOTAL : 9.4283s + [COUNTERS] Fortran Overhead ( 0 ) : 4.8187s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.6095s for 8192 events => throughput is 1.78E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438704534934E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 57.5498s - [COUNTERS] Fortran Overhead ( 0 ) : 6.6421s - [COUNTERS] CudaCpp MEs ( 2 ) : 50.9077s for 90112 events => throughput is 1.77E+03 events/s + [COUNTERS] PROGRAM TOTAL : 56.9207s + [COUNTERS] Fortran Overhead ( 0 ) : 6.6165s + [COUNTERS] CudaCpp MEs ( 2 ) : 50.3042s for 90112 events => throughput is 1.79E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.826732e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.845749e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.828543e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.855084e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612692816703E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 5.1811s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7343s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.4469s for 8192 events => throughput is 3.35E+03 events/s + [COUNTERS] PROGRAM TOTAL : 5.0818s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6854s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.3963s for 8192 events => throughput is 3.42E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438707226035E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 31.4513s - [COUNTERS] Fortran Overhead ( 0 ) : 4.5258s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.9255s for 90112 events => throughput is 3.35E+03 events/s + [COUNTERS] PROGRAM TOTAL : 30.7891s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4469s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.3422s for 90112 events => throughput is 3.42E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.443194e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.522464e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.460068e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.523204e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612675240517E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 2.4477s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3897s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0581s for 8192 events => throughput is 7.74E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.3962s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3623s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0339s for 8192 events => throughput is 7.92E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438703631775E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 14.7436s - [COUNTERS] Fortran Overhead ( 0 ) : 3.1712s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.5725s for 90112 events => throughput is 7.79E+03 events/s + [COUNTERS] PROGRAM TOTAL : 14.5036s + [COUNTERS] Fortran Overhead ( 0 ) : 3.1328s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.3708s for 90112 events => throughput is 7.92E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.981637e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.125066e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.008855e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.074324e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612675240517E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 2.1895s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2554s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9341s for 8192 events => throughput is 8.77E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.1502s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2343s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9159s for 8192 events => throughput is 8.94E+03 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438703631775E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 13.2415s - [COUNTERS] Fortran Overhead ( 0 ) : 3.0496s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.1919s for 90112 events => throughput is 8.84E+03 events/s + [COUNTERS] PROGRAM TOTAL : 12.9975s + [COUNTERS] Fortran Overhead ( 0 ) : 3.0012s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.9962s for 90112 events => throughput is 9.01E+03 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.162017e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.351869e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.164661e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.208773e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612675240517E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 2.8140s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5816s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2324s for 8192 events => throughput is 6.65E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7414s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5394s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2020s for 8192 events => throughput is 6.82E+03 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438703631775E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 16.9564s - [COUNTERS] Fortran Overhead ( 0 ) : 3.3714s - [COUNTERS] CudaCpp MEs ( 2 ) : 13.5850s for 90112 events => throughput is 6.63E+03 events/s + [COUNTERS] PROGRAM TOTAL : 16.5337s + [COUNTERS] Fortran Overhead ( 0 ) : 3.3003s + [COUNTERS] CudaCpp MEs ( 2 ) : 13.2334s for 90112 events => throughput is 6.81E+03 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.737200e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.881370e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.703774e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.943865e+03 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612512203166E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 0.9054s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8725s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0330s for 8192 events => throughput is 2.49E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9014s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8685s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0329s for 8192 events => throughput is 2.49E+05 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438642387717E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 3.0306s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6671s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3634s for 90112 events => throughput is 2.48E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.9899s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6262s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3637s for 90112 events => throughput is 2.48E+05 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.282647e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.280457e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.507791e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.523385e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.123606e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.121733e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.148703e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.162091e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.119873e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.125438e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.178988e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.168234e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.119575e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.125929e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.452858e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.451563e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index 834e2f3a3b..f87c8c9cf1 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,13 +1,13 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg - make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 + +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' @@ -20,8 +20,8 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-05-14_21:47:42 +DATE: 2024-05-16_01:43:56 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 103.3030s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5119s - [COUNTERS] Fortran MEs ( 1 ) : 102.7911s for 8192 events => throughput is 7.97E+01 events/s + [COUNTERS] PROGRAM TOTAL : 101.7235s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5049s + [COUNTERS] Fortran MEs ( 1 ) : 101.2186s for 8192 events => throughput is 8.09E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 103.2514s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5150s - [COUNTERS] Fortran MEs ( 1 ) : 102.7364s for 8192 events => throughput is 7.97E+01 events/s + [COUNTERS] PROGRAM TOTAL : 101.7703s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5109s + [COUNTERS] Fortran MEs ( 1 ) : 101.2594s for 8192 events => throughput is 8.09E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1123.8389s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4720s - [COUNTERS] Fortran MEs ( 1 ) : 1119.3668s for 90112 events => throughput is 8.05E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1119.4272s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4297s + [COUNTERS] Fortran MEs ( 1 ) : 1114.9976s for 90112 events => throughput is 8.08E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939193E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 239.0576s - [COUNTERS] Fortran Overhead ( 0 ) : 109.8826s - [COUNTERS] CudaCpp MEs ( 2 ) : 129.1750s for 8192 events => throughput is 6.34E+01 events/s + [COUNTERS] PROGRAM TOTAL : 236.4243s + [COUNTERS] Fortran Overhead ( 0 ) : 108.7156s + [COUNTERS] CudaCpp MEs ( 2 ) : 127.7087s for 8192 events => throughput is 6.41E+01 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1529.6606s - [COUNTERS] Fortran Overhead ( 0 ) : 113.3284s - [COUNTERS] CudaCpp MEs ( 2 ) : 1416.3323s for 90112 events => throughput is 6.36E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1507.2346s + [COUNTERS] Fortran Overhead ( 0 ) : 112.2012s + [COUNTERS] CudaCpp MEs ( 2 ) : 1395.0334s for 90112 events => throughput is 6.46E+01 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.491799e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.611342e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.526661e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.612518e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939197E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 115.9147s - [COUNTERS] Fortran Overhead ( 0 ) : 53.4186s - [COUNTERS] CudaCpp MEs ( 2 ) : 62.4961s for 8192 events => throughput is 1.31E+02 events/s + [COUNTERS] PROGRAM TOTAL : 114.8253s + [COUNTERS] Fortran Overhead ( 0 ) : 52.9129s + [COUNTERS] CudaCpp MEs ( 2 ) : 61.9124s for 8192 events => throughput is 1.32E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086656017E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 746.6810s - [COUNTERS] Fortran Overhead ( 0 ) : 57.3647s - [COUNTERS] CudaCpp MEs ( 2 ) : 689.3163s for 90112 events => throughput is 1.31E+02 events/s + [COUNTERS] PROGRAM TOTAL : 734.6001s + [COUNTERS] Fortran Overhead ( 0 ) : 56.8950s + [COUNTERS] CudaCpp MEs ( 2 ) : 677.7051s for 90112 events => throughput is 1.33E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.551185e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.573216e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.557439e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.570652e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 53.2704s - [COUNTERS] Fortran Overhead ( 0 ) : 24.6321s - [COUNTERS] CudaCpp MEs ( 2 ) : 28.6383s for 8192 events => throughput is 2.86E+02 events/s + [COUNTERS] PROGRAM TOTAL : 53.5594s + [COUNTERS] Fortran Overhead ( 0 ) : 24.8692s + [COUNTERS] CudaCpp MEs ( 2 ) : 28.6902s for 8192 events => throughput is 2.86E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 343.5580s - [COUNTERS] Fortran Overhead ( 0 ) : 28.7102s - [COUNTERS] CudaCpp MEs ( 2 ) : 314.8478s for 90112 events => throughput is 2.86E+02 events/s + [COUNTERS] PROGRAM TOTAL : 345.8816s + [COUNTERS] Fortran Overhead ( 0 ) : 28.6165s + [COUNTERS] CudaCpp MEs ( 2 ) : 317.2651s for 90112 events => throughput is 2.84E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.413864e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.346027e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.410239e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.397864e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 48.2891s - [COUNTERS] Fortran Overhead ( 0 ) : 21.8234s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.4657s for 8192 events => throughput is 3.10E+02 events/s + [COUNTERS] PROGRAM TOTAL : 47.5433s + [COUNTERS] Fortran Overhead ( 0 ) : 21.7991s + [COUNTERS] CudaCpp MEs ( 2 ) : 25.7442s for 8192 events => throughput is 3.18E+02 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 313.6108s - [COUNTERS] Fortran Overhead ( 0 ) : 25.6541s - [COUNTERS] CudaCpp MEs ( 2 ) : 287.9567s for 90112 events => throughput is 3.13E+02 events/s + [COUNTERS] PROGRAM TOTAL : 310.2994s + [COUNTERS] Fortran Overhead ( 0 ) : 25.7446s + [COUNTERS] CudaCpp MEs ( 2 ) : 284.5548s for 90112 events => throughput is 3.17E+02 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.799191e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.866314e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.784281e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.859864e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 51.5482s - [COUNTERS] Fortran Overhead ( 0 ) : 25.2783s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.2699s for 8192 events => throughput is 3.12E+02 events/s + [COUNTERS] PROGRAM TOTAL : 50.4926s + [COUNTERS] Fortran Overhead ( 0 ) : 24.7479s + [COUNTERS] CudaCpp MEs ( 2 ) : 25.7447s for 8192 events => throughput is 3.18E+02 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 319.4275s - [COUNTERS] Fortran Overhead ( 0 ) : 29.3179s - [COUNTERS] CudaCpp MEs ( 2 ) : 290.1096s for 90112 events => throughput is 3.11E+02 events/s + [COUNTERS] PROGRAM TOTAL : 313.6701s + [COUNTERS] Fortran Overhead ( 0 ) : 28.5625s + [COUNTERS] CudaCpp MEs ( 2 ) : 285.1075s for 90112 events => throughput is 3.16E+02 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.304770e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.394651e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.314422e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.384790e+02 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939195E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 4.2754s - [COUNTERS] Fortran Overhead ( 0 ) : 3.1942s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0812s for 8192 events => throughput is 7.58E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.2708s + [COUNTERS] Fortran Overhead ( 0 ) : 3.1879s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0829s for 8192 events => throughput is 7.56E+03 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086656006E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 19.1318s - [COUNTERS] Fortran Overhead ( 0 ) : 7.2127s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.9192s for 90112 events => throughput is 7.56E+03 events/s + [COUNTERS] PROGRAM TOTAL : 19.1407s + [COUNTERS] Fortran Overhead ( 0 ) : 7.1896s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.9511s for 90112 events => throughput is 7.54E+03 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.540502e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.518899e+03 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.290286e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.266687e+03 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.317349e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.285867e+03 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.595658e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.577065e+03 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.250349e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.302340e+03 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.495786e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.485177e+03 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.265281e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.239249e+03 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.243097e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.236704e+03 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index 14284eae7f..9938780c0a 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,22 +1,22 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 + make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-05-14_23:21:26 +DATE: 2024-05-16_03:16:41 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 103.3301s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5120s - [COUNTERS] Fortran MEs ( 1 ) : 102.8181s for 8192 events => throughput is 7.97E+01 events/s + [COUNTERS] PROGRAM TOTAL : 101.6786s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5071s + [COUNTERS] Fortran MEs ( 1 ) : 101.1715s for 8192 events => throughput is 8.10E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 103.1588s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5135s - [COUNTERS] Fortran MEs ( 1 ) : 102.6453s for 8192 events => throughput is 7.98E+01 events/s + [COUNTERS] PROGRAM TOTAL : 102.1420s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5108s + [COUNTERS] Fortran MEs ( 1 ) : 101.6312s for 8192 events => throughput is 8.06E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1125.1919s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4525s - [COUNTERS] Fortran MEs ( 1 ) : 1120.7394s for 90112 events => throughput is 8.04E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1119.6489s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4200s + [COUNTERS] Fortran MEs ( 1 ) : 1115.2289s for 90112 events => throughput is 8.08E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.241e-06 [1.2405719957040752E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 212.5627s - [COUNTERS] Fortran Overhead ( 0 ) : 97.5579s - [COUNTERS] CudaCpp MEs ( 2 ) : 115.0048s for 8192 events => throughput is 7.12E+01 events/s + [COUNTERS] PROGRAM TOTAL : 207.9761s + [COUNTERS] Fortran Overhead ( 0 ) : 95.5518s + [COUNTERS] CudaCpp MEs ( 2 ) : 112.4243s for 8192 events => throughput is 7.29E+01 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -168,9 +168,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.333e-07 [2.3326290771198648E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1367.0989s - [COUNTERS] Fortran Overhead ( 0 ) : 101.8036s - [COUNTERS] CudaCpp MEs ( 2 ) : 1265.2953s for 90112 events => throughput is 7.12E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1342.0233s + [COUNTERS] Fortran Overhead ( 0 ) : 99.5419s + [COUNTERS] CudaCpp MEs ( 2 ) : 1242.4814s for 90112 events => throughput is 7.25E+01 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -183,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.563596e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.627892e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.552450e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.617246e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -212,9 +212,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.241e-06 [1.2405717007921116E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 52.5114s - [COUNTERS] Fortran Overhead ( 0 ) : 24.7824s - [COUNTERS] CudaCpp MEs ( 2 ) : 27.7291s for 8192 events => throughput is 2.95E+02 events/s + [COUNTERS] PROGRAM TOTAL : 52.4912s + [COUNTERS] Fortran Overhead ( 0 ) : 24.8093s + [COUNTERS] CudaCpp MEs ( 2 ) : 27.6819s for 8192 events => throughput is 2.96E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -246,9 +246,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.333e-07 [2.3326284900828787E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 336.6135s - [COUNTERS] Fortran Overhead ( 0 ) : 28.7158s - [COUNTERS] CudaCpp MEs ( 2 ) : 307.8977s for 90112 events => throughput is 2.93E+02 events/s + [COUNTERS] PROGRAM TOTAL : 333.5578s + [COUNTERS] Fortran Overhead ( 0 ) : 28.7441s + [COUNTERS] CudaCpp MEs ( 2 ) : 304.8137s for 90112 events => throughput is 2.96E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -261,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.337586e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.354565e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.327352e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.352919e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -290,9 +290,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.241e-06 [1.2405716659252656E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 27.4221s - [COUNTERS] Fortran Overhead ( 0 ) : 12.7945s - [COUNTERS] CudaCpp MEs ( 2 ) : 14.6276s for 8192 events => throughput is 5.60E+02 events/s + [COUNTERS] PROGRAM TOTAL : 26.9353s + [COUNTERS] Fortran Overhead ( 0 ) : 12.5805s + [COUNTERS] CudaCpp MEs ( 2 ) : 14.3549s for 8192 events => throughput is 5.71E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -324,9 +324,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.333e-07 [2.3326277036840957E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 178.0327s - [COUNTERS] Fortran Overhead ( 0 ) : 16.8149s - [COUNTERS] CudaCpp MEs ( 2 ) : 161.2178s for 90112 events => throughput is 5.59E+02 events/s + [COUNTERS] PROGRAM TOTAL : 175.5244s + [COUNTERS] Fortran Overhead ( 0 ) : 16.6137s + [COUNTERS] CudaCpp MEs ( 2 ) : 158.9107s for 90112 events => throughput is 5.67E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -339,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.699081e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.796719e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.673580e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.813418e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -368,9 +368,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.241e-06 [1.2405716659252656E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 24.2879s - [COUNTERS] Fortran Overhead ( 0 ) : 11.2621s - [COUNTERS] CudaCpp MEs ( 2 ) : 13.0258s for 8192 events => throughput is 6.29E+02 events/s + [COUNTERS] PROGRAM TOTAL : 24.0035s + [COUNTERS] Fortran Overhead ( 0 ) : 11.1136s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.8899s for 8192 events => throughput is 6.36E+02 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -402,9 +402,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.333e-07 [2.3326277036840957E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 159.3803s - [COUNTERS] Fortran Overhead ( 0 ) : 15.2686s - [COUNTERS] CudaCpp MEs ( 2 ) : 144.1117s for 90112 events => throughput is 6.25E+02 events/s + [COUNTERS] PROGRAM TOTAL : 155.2962s + [COUNTERS] Fortran Overhead ( 0 ) : 15.0900s + [COUNTERS] CudaCpp MEs ( 2 ) : 140.2063s for 90112 events => throughput is 6.43E+02 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -417,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.640478e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.795563e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.647363e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.781198e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -446,9 +446,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.241e-06 [1.2405719306052570E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 26.0176s - [COUNTERS] Fortran Overhead ( 0 ) : 12.8723s - [COUNTERS] CudaCpp MEs ( 2 ) : 13.1453s for 8192 events => throughput is 6.23E+02 events/s + [COUNTERS] PROGRAM TOTAL : 25.3167s + [COUNTERS] Fortran Overhead ( 0 ) : 12.6125s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.7041s for 8192 events => throughput is 6.45E+02 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -480,9 +480,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.333e-07 [2.3326283660088769E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 162.3061s - [COUNTERS] Fortran Overhead ( 0 ) : 16.9325s - [COUNTERS] CudaCpp MEs ( 2 ) : 145.3735s for 90112 events => throughput is 6.20E+02 events/s + [COUNTERS] PROGRAM TOTAL : 157.3576s + [COUNTERS] Fortran Overhead ( 0 ) : 16.6484s + [COUNTERS] CudaCpp MEs ( 2 ) : 140.7092s for 90112 events => throughput is 6.40E+02 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -495,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.627762e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.841550e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.645257e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.830427e+02 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -523,9 +523,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.241e-06 [1.2405722175509512E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 2.5358s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0394s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4964s for 8192 events => throughput is 1.65E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.5511s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0591s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4920s for 8192 events => throughput is 1.66E+04 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -556,9 +556,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.333e-07 [2.3326296967941821E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 11.4669s - [COUNTERS] Fortran Overhead ( 0 ) : 6.0479s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.4190s for 90112 events => throughput is 1.66E+04 events/s + [COUNTERS] PROGRAM TOTAL : 11.4353s + [COUNTERS] Fortran Overhead ( 0 ) : 6.0032s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.4321s for 90112 events => throughput is 1.66E+04 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -571,42 +571,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.635824e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.630624e+04 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.645926e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.646596e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.338441e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.329013e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.336873e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.359221e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.301357e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.329144e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.323702e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.339287e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.316894e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.285838e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.434196e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.423096e+03 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 4c053bc03a..9cddd5fe7c 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,13 +1,13 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg + make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone - make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 + make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-05-15_00:31:56 +DATE: 2024-05-16_04:26:13 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 103.3564s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5129s - [COUNTERS] Fortran MEs ( 1 ) : 102.8435s for 8192 events => throughput is 7.97E+01 events/s + [COUNTERS] PROGRAM TOTAL : 101.9892s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5028s + [COUNTERS] Fortran MEs ( 1 ) : 101.4864s for 8192 events => throughput is 8.07E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 103.0354s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5118s - [COUNTERS] Fortran MEs ( 1 ) : 102.5236s for 8192 events => throughput is 7.99E+01 events/s + [COUNTERS] PROGRAM TOTAL : 101.7400s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5055s + [COUNTERS] Fortran MEs ( 1 ) : 101.2345s for 8192 events => throughput is 8.09E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1126.2368s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4377s - [COUNTERS] Fortran MEs ( 1 ) : 1121.7991s for 90112 events => throughput is 8.03E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1119.6356s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4254s + [COUNTERS] Fortran MEs ( 1 ) : 1115.2102s for 90112 events => throughput is 8.08E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985299359844E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 225.4910s - [COUNTERS] Fortran Overhead ( 0 ) : 103.7263s - [COUNTERS] CudaCpp MEs ( 2 ) : 121.7646s for 8192 events => throughput is 6.73E+01 events/s + [COUNTERS] PROGRAM TOTAL : 229.4020s + [COUNTERS] Fortran Overhead ( 0 ) : 103.2152s + [COUNTERS] CudaCpp MEs ( 2 ) : 126.1868s for 8192 events => throughput is 6.49E+01 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993212353001E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1531.3712s - [COUNTERS] Fortran Overhead ( 0 ) : 116.0375s - [COUNTERS] CudaCpp MEs ( 2 ) : 1415.3337s for 90112 events => throughput is 6.37E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1528.1049s + [COUNTERS] Fortran Overhead ( 0 ) : 113.9982s + [COUNTERS] CudaCpp MEs ( 2 ) : 1414.1067s for 90112 events => throughput is 6.37E+01 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.382180e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.425842e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.466526e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.948869e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985295828471E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 120.2262s - [COUNTERS] Fortran Overhead ( 0 ) : 54.8384s - [COUNTERS] CudaCpp MEs ( 2 ) : 65.3878s for 8192 events => throughput is 1.25E+02 events/s + [COUNTERS] PROGRAM TOTAL : 117.4242s + [COUNTERS] Fortran Overhead ( 0 ) : 53.6967s + [COUNTERS] CudaCpp MEs ( 2 ) : 63.7275s for 8192 events => throughput is 1.29E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993222645653E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 782.8292s - [COUNTERS] Fortran Overhead ( 0 ) : 58.6898s - [COUNTERS] CudaCpp MEs ( 2 ) : 724.1394s for 90112 events => throughput is 1.24E+02 events/s + [COUNTERS] PROGRAM TOTAL : 756.7451s + [COUNTERS] Fortran Overhead ( 0 ) : 57.7650s + [COUNTERS] CudaCpp MEs ( 2 ) : 698.9802s for 90112 events => throughput is 1.29E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.516262e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.540886e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.516185e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.526888e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 52.1777s - [COUNTERS] Fortran Overhead ( 0 ) : 24.0873s - [COUNTERS] CudaCpp MEs ( 2 ) : 28.0903s for 8192 events => throughput is 2.92E+02 events/s + [COUNTERS] PROGRAM TOTAL : 51.3666s + [COUNTERS] Fortran Overhead ( 0 ) : 23.6472s + [COUNTERS] CudaCpp MEs ( 2 ) : 27.7194s for 8192 events => throughput is 2.96E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 343.2865s - [COUNTERS] Fortran Overhead ( 0 ) : 27.8199s - [COUNTERS] CudaCpp MEs ( 2 ) : 315.4666s for 90112 events => throughput is 2.86E+02 events/s + [COUNTERS] PROGRAM TOTAL : 334.4015s + [COUNTERS] Fortran Overhead ( 0 ) : 27.7321s + [COUNTERS] CudaCpp MEs ( 2 ) : 306.6693s for 90112 events => throughput is 2.94E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.529786e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.517938e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.531384e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.525606e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 45.2330s - [COUNTERS] Fortran Overhead ( 0 ) : 20.3742s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.8588s for 8192 events => throughput is 3.30E+02 events/s + [COUNTERS] PROGRAM TOTAL : 44.9641s + [COUNTERS] Fortran Overhead ( 0 ) : 20.5328s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.4313s for 8192 events => throughput is 3.35E+02 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 297.0222s - [COUNTERS] Fortran Overhead ( 0 ) : 24.2760s - [COUNTERS] CudaCpp MEs ( 2 ) : 272.7462s for 90112 events => throughput is 3.30E+02 events/s + [COUNTERS] PROGRAM TOTAL : 294.2131s + [COUNTERS] Fortran Overhead ( 0 ) : 24.5260s + [COUNTERS] CudaCpp MEs ( 2 ) : 269.6871s for 90112 events => throughput is 3.34E+02 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.086618e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.118767e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.034848e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.099496e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 50.6747s - [COUNTERS] Fortran Overhead ( 0 ) : 24.5956s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.0791s for 8192 events => throughput is 3.14E+02 events/s + [COUNTERS] PROGRAM TOTAL : 49.6272s + [COUNTERS] Fortran Overhead ( 0 ) : 23.9735s + [COUNTERS] CudaCpp MEs ( 2 ) : 25.6537s for 8192 events => throughput is 3.19E+02 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 317.7787s - [COUNTERS] Fortran Overhead ( 0 ) : 28.6879s - [COUNTERS] CudaCpp MEs ( 2 ) : 289.0908s for 90112 events => throughput is 3.12E+02 events/s + [COUNTERS] PROGRAM TOTAL : 308.1385s + [COUNTERS] Fortran Overhead ( 0 ) : 28.1125s + [COUNTERS] CudaCpp MEs ( 2 ) : 280.0260s for 90112 events => throughput is 3.22E+02 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.404102e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.494217e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.409784e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.498492e+02 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985217419736E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 3.6204s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7568s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8636s for 8192 events => throughput is 9.49E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.6127s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7479s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8648s for 8192 events => throughput is 9.47E+03 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993078576733E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 16.2929s - [COUNTERS] Fortran Overhead ( 0 ) : 6.7937s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.4992s for 90112 events => throughput is 9.49E+03 events/s + [COUNTERS] PROGRAM TOTAL : 16.2177s + [COUNTERS] Fortran Overhead ( 0 ) : 6.7356s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.4821s for 90112 events => throughput is 9.50E+03 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.473549e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.422089e+03 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.087340e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.074505e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.107601e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.108350e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.158418e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.160591e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.111818e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.110190e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.107191e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.113742e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.111406e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.112799e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.647341e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.647292e+03 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 759f07dd06..c909267a2d 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -1,11 +1,11 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu - - make USEBUILDDIR=1 BACKEND=cuda + make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 + +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-05-14_21:46:12 +DATE: 2024-05-16_01:42:38 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1817 events) - [COUNTERS] PROGRAM TOTAL : 0.4845s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4094s - [COUNTERS] Fortran MEs ( 1 ) : 0.0750s for 8192 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4817s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4067s + [COUNTERS] Fortran MEs ( 1 ) : 0.0751s for 8192 events => throughput is 1.09E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4152s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3402s - [COUNTERS] Fortran MEs ( 1 ) : 0.0750s for 8192 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4101s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3363s + [COUNTERS] Fortran MEs ( 1 ) : 0.0738s for 8192 events => throughput is 1.11E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.4368s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6165s - [COUNTERS] Fortran MEs ( 1 ) : 0.8203s for 90112 events => throughput is 1.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3997s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5919s + [COUNTERS] Fortran MEs ( 1 ) : 0.8079s for 90112 events => throughput is 1.12E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263335] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.5011s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4200s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0810s for 8192 events => throughput is 1.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4922s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4133s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0789s for 8192 events => throughput is 1.04E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561293] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.5731s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6791s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8940s for 90112 events => throughput is 1.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.5284s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6490s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8794s for 90112 events => throughput is 1.02E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.029302e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.038604e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.033246e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.042402e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351262530] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4255s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3819s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0436s for 8192 events => throughput is 1.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4203s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3775s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0428s for 8192 events => throughput is 1.91E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561281] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.1141s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6315s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4826s for 90112 events => throughput is 1.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0947s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6149s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4799s for 90112 events => throughput is 1.88E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.900971e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.949373e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.818132e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.919896e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3894s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3642s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0252s for 8192 events => throughput is 3.25E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3846s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3601s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0244s for 8192 events => throughput is 3.35E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.8967s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6165s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2802s for 90112 events => throughput is 3.22E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8777s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6020s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2756s for 90112 events => throughput is 3.27E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.238269e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.197689e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.199395e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.341628e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3857s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3628s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 8192 events => throughput is 3.59E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3821s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3591s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0230s for 8192 events => throughput is 3.56E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.8679s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6157s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2522s for 90112 events => throughput is 3.57E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8414s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5939s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2475s for 90112 events => throughput is 3.64E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.606523e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.543777e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.588739e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.667139e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4110s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3757s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0353s for 8192 events => throughput is 2.32E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4059s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3719s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0340s for 8192 events => throughput is 2.41E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.0223s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6326s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3897s for 90112 events => throughput is 2.31E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0051s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6219s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3832s for 90112 events => throughput is 2.35E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.343272e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.349902e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.366477e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.363260e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263363] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.7764s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7757s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.22E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7762s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7756s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.23E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561304] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.0365s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0284s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0081s for 90112 events => throughput is 1.11E+07 events/s + [COUNTERS] PROGRAM TOTAL : 2.0116s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0036s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0080s for 90112 events => throughput is 1.13E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.543217e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.582112e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.108201e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.121699e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.511173e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.529721e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.528369e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.531629e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.512246e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.538677e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.808064e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.807286e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.513388e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.530045e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.778138e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.783374e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 286b8a0bd4..e05d3f7736 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -18,8 +18,8 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-05-14_21:46:42 +DATE: 2024-05-16_01:43:08 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1817 events) - [COUNTERS] PROGRAM TOTAL : 0.4843s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4094s - [COUNTERS] Fortran MEs ( 1 ) : 0.0749s for 8192 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4769s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4030s + [COUNTERS] Fortran MEs ( 1 ) : 0.0739s for 8192 events => throughput is 1.11E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4167s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3417s - [COUNTERS] Fortran MEs ( 1 ) : 0.0751s for 8192 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4080s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3346s + [COUNTERS] Fortran MEs ( 1 ) : 0.0734s for 8192 events => throughput is 1.12E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.4405s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6199s - [COUNTERS] Fortran MEs ( 1 ) : 0.8207s for 90112 events => throughput is 1.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.4013s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5943s + [COUNTERS] Fortran MEs ( 1 ) : 0.8069s for 90112 events => throughput is 1.12E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110463093540638] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4910s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4146s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0764s for 8192 events => throughput is 1.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4876s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4127s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0749s for 8192 events => throughput is 1.09E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686273216112] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.5078s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6674s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8404s for 90112 events => throughput is 1.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.4731s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6478s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8253s for 90112 events => throughput is 1.09E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.092729e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.110229e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.094826e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.114385e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110459152958460] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3939s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3669s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0270s for 8192 events => throughput is 3.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3862s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3596s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0266s for 8192 events => throughput is 3.08E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510683016166510] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.9150s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6174s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2976s for 90112 events => throughput is 3.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8914s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5982s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2932s for 90112 events => throughput is 3.07E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.038989e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.059471e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.060486e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.086598e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110460595003461] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3649s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3515s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0134s for 8192 events => throughput is 6.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3600s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3466s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0134s for 8192 events => throughput is 6.09E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510682502089912] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.7586s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6103s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1483s for 90112 events => throughput is 6.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7375s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5912s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1463s for 90112 events => throughput is 6.16E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.085610e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.184642e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.998809e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.224019e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110460595003461] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3637s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3513s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0124s for 8192 events => throughput is 6.60E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3591s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3468s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0123s for 8192 events => throughput is 6.64E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510682502089912] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.7434s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6053s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1381s for 90112 events => throughput is 6.52E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7159s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5799s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1360s for 90112 events => throughput is 6.63E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.613064e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.683790e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.625420e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.800598e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110464176080312] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3755s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3577s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0178s for 8192 events => throughput is 4.59E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3734s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3559s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0174s for 8192 events => throughput is 4.70E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -463,143 +463,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510685411522326] fbridge_mode=1 - [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.8107s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6141s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1966s for 90112 events => throughput is 4.58E+05 events/s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21510686556561295) and cpp (0.21510685411522326) differ by less than 4E-4 (5.3231167917999755e-08) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.752745e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.780214e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110478167944563] fbridge_mode=1 - [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.7742s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7737s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.56E+07 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.27110539351263330) and cuda (0.27110478167944563) differ by less than 4E-4 (2.2568093527297606e-06) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510689885789414] fbridge_mode=1 - [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.0355s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0291s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0064s for 90112 events => throughput is 1.42E+07 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21510686556561295) and cuda (0.21510689885789414) differ by less than 4E-4 (1.547708907700951e-07) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.752013e+07 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.369248e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.053096e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.714374e+08 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.107820e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.803718e+08 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.560411e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.935709e+07 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** -TEST COMPLETED +Program received signal SIGFPE: Floating-point exception - erroneous arithmetic operation. + +Backtrace for this error: +#0 0x7f2a1a623860 in ??? +#1 0x7f2a1a622a05 in ??? +#2 0x7f2a1a254def in ??? +#3 0x7f2a1ae20acc in ??? +#4 0x7f2a1acc4575 in ??? +#5 0x7f2a1ae1d4c9 in ??? +#6 0x7f2a1ae2570d in ??? +#7 0x7f2a1ae2afa1 in ??? +#8 0x43008b in ??? +#9 0x431c10 in ??? +#10 0x432d47 in ??? +#11 0x433b1e in ??? +#12 0x44a921 in ??? +#13 0x42ebbf in ??? +#14 0x40371e in ??? +#15 0x7f2a1a23feaf in ??? +#16 0x7f2a1a23ff5f in ??? +#17 0x403844 in ??? +#18 0xffffffffffffffff in ??? +./madX.sh: line 379: 3004240 Floating point exception(core dumped) $timecmd $cmd < ${tmpin} > ${tmp} +ERROR! ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' failed + PDF set = nn23lo1 + alpha_s(Mz)= 0.1300 running at 2 loops. + alpha_s(Mz)= 0.1300 running at 2 loops. + Renormalization scale set on event-by-event basis + Factorization scale set on event-by-event basis + + + getting user params +Enter number of events and max and min iterations: + Number of events and iterations 81920 1 1 diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index ca0042dad1..23f8d1233a 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -1,22 +1,22 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu - make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone + make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-05-14_21:47:11 +DATE: 2024-05-16_01:43:25 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1817 events) - [COUNTERS] PROGRAM TOTAL : 0.4845s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4096s - [COUNTERS] Fortran MEs ( 1 ) : 0.0749s for 8192 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4870s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4120s + [COUNTERS] Fortran MEs ( 1 ) : 0.0750s for 8192 events => throughput is 1.09E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4164s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3414s - [COUNTERS] Fortran MEs ( 1 ) : 0.0749s for 8192 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4171s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3424s + [COUNTERS] Fortran MEs ( 1 ) : 0.0747s for 8192 events => throughput is 1.10E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.4386s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6180s - [COUNTERS] Fortran MEs ( 1 ) : 0.8207s for 90112 events => throughput is 1.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.4382s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6232s + [COUNTERS] Fortran MEs ( 1 ) : 0.8150s for 90112 events => throughput is 1.11E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539348916002] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4992s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4184s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0808s for 8192 events => throughput is 1.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5038s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4232s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0806s for 8192 events => throughput is 1.02E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686560794337] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.5686s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6770s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8916s for 90112 events => throughput is 1.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.5722s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6823s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8899s for 90112 events => throughput is 1.01E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.029942e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.030982e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.032493e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.031715e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539348916002] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4253s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3830s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0423s for 8192 events => throughput is 1.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4234s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3811s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0422s for 8192 events => throughput is 1.94E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686560794334] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.1014s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6354s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4660s for 90112 events => throughput is 1.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.1135s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6453s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4682s for 90112 events => throughput is 1.92E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.920832e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.925046e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.926600e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.926756e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539330272815] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3906s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3655s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0251s for 8192 events => throughput is 3.26E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3913s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3660s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0253s for 8192 events => throughput is 3.24E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686558551748] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.8987s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6198s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2789s for 90112 events => throughput is 3.23E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8962s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6193s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2769s for 90112 events => throughput is 3.25E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.244752e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.266154e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.318560e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.269502e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539330272815] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3830s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3609s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0220s for 8192 events => throughput is 3.72E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3831s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3610s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0221s for 8192 events => throughput is 3.70E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686558551748] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.8583s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6137s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2446s for 90112 events => throughput is 3.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8418s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5992s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2426s for 90112 events => throughput is 3.71E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.659431e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.789065e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.741866e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.854777e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -429,6 +429,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 @@ -437,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539330272815] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4131s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3768s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0364s for 8192 events => throughput is 2.25E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4096s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3737s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0358s for 8192 events => throughput is 2.29E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -462,7 +463,6 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 @@ -471,9 +471,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686558551748] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.0378s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6340s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4039s for 90112 events => throughput is 2.23E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0275s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6170s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4105s for 90112 events => throughput is 2.20E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.283252e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.335204e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.302923e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.335548e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,8 +514,8 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539343558537] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.7774s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7767s + [COUNTERS] PROGRAM TOTAL : 0.7731s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7724s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.22E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686553631395] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.0336s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0256s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0080s for 90112 events => throughput is 1.13E+07 events/s + [COUNTERS] PROGRAM TOTAL : 2.0091s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0010s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0081s for 90112 events => throughput is 1.12E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,42 +562,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.556024e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.632055e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.988898e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.046951e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.509950e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.534300e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.533082e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.533151e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.514260e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.529948e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.814949e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.832656e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.510075e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.532773e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.782063e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.792603e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt index 43740fd349..7b51bb9221 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt @@ -1,13 +1,13 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-05-15_02:05:41 +DATE: 2024-05-16_05:58:47 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 3321 events (found 6423 events) - [COUNTERS] PROGRAM TOTAL : 0.9549s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9051s - [COUNTERS] Fortran MEs ( 1 ) : 0.0497s for 8192 events => throughput is 1.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9450s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8961s + [COUNTERS] Fortran MEs ( 1 ) : 0.0489s for 8192 events => throughput is 1.68E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4317s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3821s - [COUNTERS] Fortran MEs ( 1 ) : 0.0496s for 8192 events => throughput is 1.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4283s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3794s + [COUNTERS] Fortran MEs ( 1 ) : 0.0489s for 8192 events => throughput is 1.67E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=0 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.8870s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3416s - [COUNTERS] Fortran MEs ( 1 ) : 0.5454s for 90112 events => throughput is 1.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8593s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3225s + [COUNTERS] Fortran MEs ( 1 ) : 0.5368s for 90112 events => throughput is 1.68E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256148] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4624s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4185s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0439s for 8192 events => throughput is 1.86E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4564s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4132s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0431s for 8192 events => throughput is 1.90E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377564] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.8566s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3716s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4850s for 90112 events => throughput is 1.86E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8316s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3535s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4781s for 90112 events => throughput is 1.88E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.913736e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.935091e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.920291e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.955688e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.5374s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5126s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0247s for 8192 events => throughput is 3.31E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4208s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3966s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0242s for 8192 events => throughput is 3.38E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377564] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.6257s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3531s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2727s for 90112 events => throughput is 3.31E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6048s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3350s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2698s for 90112 events => throughput is 3.34E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.389926e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.396840e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.415683e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.435183e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256152] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4050s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3901s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0150s for 8192 events => throughput is 5.47E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4008s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3863s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0145s for 8192 events => throughput is 5.65E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377560] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.5072s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3422s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1650s for 90112 events => throughput is 5.46E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5134s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3430s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1705s for 90112 events => throughput is 5.29E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.625678e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.293625e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.528170e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.214602e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256152] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4043s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3906s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0137s for 8192 events => throughput is 5.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3999s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3865s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0134s for 8192 events => throughput is 6.10E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377560] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.5003s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3490s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1513s for 90112 events => throughput is 5.96E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4652s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3183s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1469s for 90112 events => throughput is 6.13E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.064246e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.964595e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.053452e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.873725e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256152] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4208s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3980s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 8192 events => throughput is 3.59E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4159s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3941s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0218s for 8192 events => throughput is 3.76E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377560] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.6097s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3571s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2526s for 90112 events => throughput is 3.57E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5767s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3320s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2446s for 90112 events => throughput is 3.68E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.657278e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.692127e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.678863e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.804832e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,8 +513,8 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256165] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.8151s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8145s + [COUNTERS] PROGRAM TOTAL : 0.8067s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8061s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.36E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377573] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.7718s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7649s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0069s for 90112 events => throughput is 1.31E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7545s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7475s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0070s for 90112 events => throughput is 1.29E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.973752e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.804334e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.174363e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.230995e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.100865e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.136468e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.773387e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.810257e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.096045e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.115441e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.046124e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.048562e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.122711e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.115501e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.717733e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.748994e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt index 8866575613..d09b81d7d3 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt @@ -2,21 +2,21 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/h make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone - -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 + make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-05-15_02:06:09 +DATE: 2024-05-16_05:59:14 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 3321 events (found 6423 events) - [COUNTERS] PROGRAM TOTAL : 0.9537s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9040s - [COUNTERS] Fortran MEs ( 1 ) : 0.0497s for 8192 events => throughput is 1.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9433s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8939s + [COUNTERS] Fortran MEs ( 1 ) : 0.0495s for 8192 events => throughput is 1.66E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4243s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3746s - [COUNTERS] Fortran MEs ( 1 ) : 0.0497s for 8192 events => throughput is 1.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4256s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3772s + [COUNTERS] Fortran MEs ( 1 ) : 0.0484s for 8192 events => throughput is 1.69E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=0 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.8857s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3389s - [COUNTERS] Fortran MEs ( 1 ) : 0.5469s for 90112 events => throughput is 1.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8568s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3199s + [COUNTERS] Fortran MEs ( 1 ) : 0.5369s for 90112 events => throughput is 1.68E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162897371946169] fbridge_mode=1 [UNWEIGHT] Wrote 1620 events (found 1625 events) - [COUNTERS] PROGRAM TOTAL : 0.4592s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4173s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0419s for 8192 events => throughput is 1.96E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4525s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4113s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0413s for 8192 events => throughput is 1.99E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt index 7a112b526f..291c38991b 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt @@ -1,7 +1,7 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-05-15_02:06:15 +DATE: 2024-05-16_05:59:20 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 3321 events (found 6423 events) - [COUNTERS] PROGRAM TOTAL : 0.9548s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9052s - [COUNTERS] Fortran MEs ( 1 ) : 0.0496s for 8192 events => throughput is 1.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9570s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9073s + [COUNTERS] Fortran MEs ( 1 ) : 0.0498s for 8192 events => throughput is 1.65E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4258s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3761s - [COUNTERS] Fortran MEs ( 1 ) : 0.0497s for 8192 events => throughput is 1.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4201s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3715s + [COUNTERS] Fortran MEs ( 1 ) : 0.0486s for 8192 events => throughput is 1.68E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=0 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.8849s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3405s - [COUNTERS] Fortran MEs ( 1 ) : 0.5444s for 90112 events => throughput is 1.66E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8553s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3195s + [COUNTERS] Fortran MEs ( 1 ) : 0.5358s for 90112 events => throughput is 1.68E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955975930954] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4626s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4185s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0441s for 8192 events => throughput is 1.86E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4604s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4161s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0443s for 8192 events => throughput is 1.85E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -168,9 +168,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895706383660] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.8596s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3731s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4865s for 90112 events => throughput is 1.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8327s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3534s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4793s for 90112 events => throughput is 1.88E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -184,13 +184,13 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.780391e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.817766e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.791166e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.799752e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,8 +214,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955975930958] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4249s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4000s + [COUNTERS] PROGRAM TOTAL : 0.4227s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3979s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0248s for 8192 events => throughput is 3.30E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -248,9 +248,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895706383669] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.6282s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3511s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2771s for 90112 events => throughput is 3.25E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6020s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3321s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2699s for 90112 events => throughput is 3.34E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -264,13 +264,13 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.181686e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.208317e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.208038e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.242147e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,9 +294,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955953691082] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4056s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3902s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0155s for 8192 events => throughput is 5.30E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4018s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3865s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0153s for 8192 events => throughput is 5.35E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -328,9 +328,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895701243878] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.5122s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3434s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1688s for 90112 events => throughput is 5.34E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4998s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3319s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1679s for 90112 events => throughput is 5.37E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -344,13 +344,13 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.673120e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.916130e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.737995e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.911254e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,9 +374,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955953691082] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4033s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3893s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0139s for 8192 events => throughput is 5.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4012s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3876s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0136s for 8192 events => throughput is 6.01E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -408,9 +408,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895701243878] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.4998s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3458s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1539s for 90112 events => throughput is 5.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4747s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3226s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1520s for 90112 events => throughput is 5.93E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -424,13 +424,13 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.111638e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.307270e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.181877e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.162265e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,9 +454,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955953691082] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4235s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3998s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0237s for 8192 events => throughput is 3.46E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4179s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3948s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0231s for 8192 events => throughput is 3.55E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -488,9 +488,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895701243878] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.6230s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3623s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2607s for 90112 events => throughput is 3.46E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5949s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3393s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2556s for 90112 events => throughput is 3.53E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -504,13 +504,13 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.284728e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.332654e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.311060e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.349365e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -533,9 +533,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955503257827] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.8168s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8162s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.39E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.8124s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8118s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.37E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -566,9 +566,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895242795732] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.7735s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7667s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0069s for 90112 events => throughput is 1.32E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7514s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7444s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0070s for 90112 events => throughput is 1.29E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -581,42 +581,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.993711e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.815756e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.200906e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.247313e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.104870e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.109600e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.706347e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.657132e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.113512e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.111219e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.043454e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.039865e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.115682e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.111200e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.706517e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.771983e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt index 363e482fee..80269e77b1 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt @@ -2,21 +2,21 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/s make USEBUILDDIR=1 BACKEND=cuda + make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y +make USEBUILDDIR=1 BACKEND=cppsse4 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' - make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-05-15_02:07:14 +DATE: 2024-05-16_06:00:18 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 1041 events) - [COUNTERS] PROGRAM TOTAL : 2.7125s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3468s - [COUNTERS] Fortran MEs ( 1 ) : 2.3657s for 8192 events => throughput is 3.46E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6807s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3392s + [COUNTERS] Fortran MEs ( 1 ) : 2.3415s for 8192 events => throughput is 3.50E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.7089s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3433s - [COUNTERS] Fortran MEs ( 1 ) : 2.3656s for 8192 events => throughput is 3.46E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6731s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3387s + [COUNTERS] Fortran MEs ( 1 ) : 2.3344s for 8192 events => throughput is 3.51E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > / [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 27.8837s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8377s - [COUNTERS] Fortran MEs ( 1 ) : 26.0460s for 90112 events => throughput is 3.46E+03 events/s + [COUNTERS] PROGRAM TOTAL : 27.7347s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8315s + [COUNTERS] Fortran MEs ( 1 ) : 25.9031s for 90112 events => throughput is 3.48E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 5.3314s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7867s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.5447s for 8192 events => throughput is 3.22E+03 events/s + [COUNTERS] PROGRAM TOTAL : 5.3198s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7672s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.5526s for 8192 events => throughput is 3.21E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438187E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 32.2519s - [COUNTERS] Fortran Overhead ( 0 ) : 4.2639s - [COUNTERS] CudaCpp MEs ( 2 ) : 27.9881s for 90112 events => throughput is 3.22E+03 events/s + [COUNTERS] PROGRAM TOTAL : 31.9550s + [COUNTERS] Fortran Overhead ( 0 ) : 4.2173s + [COUNTERS] CudaCpp MEs ( 2 ) : 27.7377s for 90112 events => throughput is 3.25E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.358191e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.410579e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.362821e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.412797e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084412E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.9787s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6481s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.3306s for 8192 events => throughput is 6.16E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.9225s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6149s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.3076s for 8192 events => throughput is 6.27E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 17.7399s - [COUNTERS] Fortran Overhead ( 0 ) : 3.1202s - [COUNTERS] CudaCpp MEs ( 2 ) : 14.6196s for 90112 events => throughput is 6.16E+03 events/s + [COUNTERS] PROGRAM TOTAL : 17.4719s + [COUNTERS] Fortran Overhead ( 0 ) : 3.0716s + [COUNTERS] CudaCpp MEs ( 2 ) : 14.4004s for 90112 events => throughput is 6.26E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.391673e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.486190e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.406849e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.480385e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.5110s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9206s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5903s for 8192 events => throughput is 1.39E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.4971s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9161s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5810s for 8192 events => throughput is 1.41E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 8.9045s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3985s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.5060s for 90112 events => throughput is 1.39E+04 events/s + [COUNTERS] PROGRAM TOTAL : 8.7948s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3698s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.4250s for 90112 events => throughput is 1.40E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.429892e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.446474e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.429297e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.453779e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.3737s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8523s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5214s for 8192 events => throughput is 1.57E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.3443s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8339s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5104s for 8192 events => throughput is 1.60E+04 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 8.0618s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3228s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.7390s for 90112 events => throughput is 1.57E+04 events/s + [COUNTERS] PROGRAM TOTAL : 8.0840s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3189s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.7651s for 90112 events => throughput is 1.56E+04 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.631894e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.657111e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.630679e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.653778e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.7374s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0390s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6983s for 8192 events => throughput is 1.17E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.7114s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0187s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6926s for 8192 events => throughput is 1.18E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 10.1862s - [COUNTERS] Fortran Overhead ( 0 ) : 2.5119s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.6743s for 90112 events => throughput is 1.17E+04 events/s + [COUNTERS] PROGRAM TOTAL : 10.1275s + [COUNTERS] Fortran Overhead ( 0 ) : 2.5284s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.5992s for 90112 events => throughput is 1.19E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.188749e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.225114e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.192286e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.220944e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftgg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.8347s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8176s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0171s for 8192 events => throughput is 4.78E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8326s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8156s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0171s for 8192 events => throughput is 4.80E+05 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftgg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 2.4695s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2802s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1893s for 90112 events => throughput is 4.76E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.4753s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2868s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1885s for 90112 events => throughput is 4.78E+05 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.844936e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.843300e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.235132e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.218089e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.142804e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.155842e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.412161e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.421655e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.141114e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.190444e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.415015e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.415946e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.148097e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.148629e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.763971e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.764175e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt index 8c90f7d65e..45b154f6da 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt @@ -1,10 +1,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx - make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-05-15_02:09:53 +DATE: 2024-05-16_06:02:57 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 1041 events) - [COUNTERS] PROGRAM TOTAL : 2.7125s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3447s - [COUNTERS] Fortran MEs ( 1 ) : 2.3677s for 8192 events => throughput is 3.46E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6694s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3400s + [COUNTERS] Fortran MEs ( 1 ) : 2.3294s for 8192 events => throughput is 3.52E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.7098s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3423s - [COUNTERS] Fortran MEs ( 1 ) : 2.3675s for 8192 events => throughput is 3.46E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6748s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3390s + [COUNTERS] Fortran MEs ( 1 ) : 2.3358s for 8192 events => throughput is 3.51E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > / [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 27.8796s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8397s - [COUNTERS] Fortran MEs ( 1 ) : 26.0399s for 90112 events => throughput is 3.46E+03 events/s + [COUNTERS] PROGRAM TOTAL : 27.4771s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8059s + [COUNTERS] Fortran MEs ( 1 ) : 25.6712s for 90112 events => throughput is 3.51E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896785213255034E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 5.1560s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7117s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.4442s for 8192 events => throughput is 3.35E+03 events/s + [COUNTERS] PROGRAM TOTAL : 5.0871s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6795s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.4076s for 8192 events => throughput is 3.40E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668138359550833E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 31.0848s - [COUNTERS] Fortran Overhead ( 0 ) : 4.1980s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.8868s for 90112 events => throughput is 3.35E+03 events/s + [COUNTERS] PROGRAM TOTAL : 30.6384s + [COUNTERS] Fortran Overhead ( 0 ) : 4.1272s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.5112s for 90112 events => throughput is 3.40E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.468936e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.518646e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.470461e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.519140e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896766542858863E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.7104s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0198s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6906s for 8192 events => throughput is 1.19E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.6928s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0131s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6797s for 8192 events => throughput is 1.21E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668121906848987E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 10.0856s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4949s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.5907s for 90112 events => throughput is 1.19E+04 events/s + [COUNTERS] PROGRAM TOTAL : 9.9205s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4645s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.4560s for 90112 events => throughput is 1.21E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.208989e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.232964e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.217159e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.231409e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896764408326359E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.9411s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6390s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3021s for 8192 events => throughput is 2.71E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.9294s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6306s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2988s for 8192 events => throughput is 2.74E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668124799901306E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 5.4424s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1114s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.3310s for 90112 events => throughput is 2.71E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.3691s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0900s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.2791s for 90112 events => throughput is 2.75E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.798166e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.797581e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.773514e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.799365e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896764408326359E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.8751s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6051s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2700s for 8192 events => throughput is 3.03E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8582s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5958s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2624s for 8192 events => throughput is 3.12E+04 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668124799901306E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 5.0437s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0874s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.9562s for 90112 events => throughput is 3.05E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.9443s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0456s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.8987s for 90112 events => throughput is 3.11E+04 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.174347e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.232867e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.165933e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.231460e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896778056937195E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.0418s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6907s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3511s for 8192 events => throughput is 2.33E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.0285s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6828s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3457s for 8192 events => throughput is 2.37E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668139178203571E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 6.0003s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1658s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.8345s for 90112 events => throughput is 2.35E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.9990s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1683s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.8307s for 90112 events => throughput is 2.35E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.388563e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.399591e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.384278e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.392653e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftgg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896805369365078E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.8239s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8100s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0139s for 8192 events => throughput is 5.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8276s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8136s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0140s for 8192 events => throughput is 5.86E+05 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftgg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668194616292154E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 2.4314s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2776s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1538s for 90112 events => throughput is 5.86E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.4309s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2769s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1540s for 90112 events => throughput is 5.85E+05 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.225876e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.229528e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.504759e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.512458e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.365193e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.376887e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.375280e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.385341e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.353719e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.353397e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.385176e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.396782e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.374018e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.372639e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.819946e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.860961e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt index c4e3a5b9c2..66daeb0e97 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-05-15_02:12:03 +DATE: 2024-05-16_06:05:05 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 1041 events) - [COUNTERS] PROGRAM TOTAL : 2.7093s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3438s - [COUNTERS] Fortran MEs ( 1 ) : 2.3655s for 8192 events => throughput is 3.46E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7059s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3465s + [COUNTERS] Fortran MEs ( 1 ) : 2.3593s for 8192 events => throughput is 3.47E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.7079s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3415s - [COUNTERS] Fortran MEs ( 1 ) : 2.3664s for 8192 events => throughput is 3.46E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7035s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3438s + [COUNTERS] Fortran MEs ( 1 ) : 2.3597s for 8192 events => throughput is 3.47E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > / [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 27.8728s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8352s - [COUNTERS] Fortran MEs ( 1 ) : 26.0376s for 90112 events => throughput is 3.46E+03 events/s + [COUNTERS] PROGRAM TOTAL : 27.5770s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8193s + [COUNTERS] Fortran MEs ( 1 ) : 25.7577s for 90112 events => throughput is 3.50E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896696375074447E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 5.3771s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8109s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.5662s for 8192 events => throughput is 3.19E+03 events/s + [COUNTERS] PROGRAM TOTAL : 5.3005s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7718s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.5286s for 8192 events => throughput is 3.24E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668081976882373E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 32.7261s - [COUNTERS] Fortran Overhead ( 0 ) : 4.2892s - [COUNTERS] CudaCpp MEs ( 2 ) : 28.4369s for 90112 events => throughput is 3.17E+03 events/s + [COUNTERS] PROGRAM TOTAL : 31.9857s + [COUNTERS] Fortran Overhead ( 0 ) : 4.2191s + [COUNTERS] CudaCpp MEs ( 2 ) : 27.7666s for 90112 events => throughput is 3.25E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.328150e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.386546e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.339265e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.397002e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896696285825688E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.9285s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6200s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.3084s for 8192 events => throughput is 6.26E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.8744s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5934s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2810s for 8192 events => throughput is 6.40E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668081890954375E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 17.4683s - [COUNTERS] Fortran Overhead ( 0 ) : 3.0870s - [COUNTERS] CudaCpp MEs ( 2 ) : 14.3813s for 90112 events => throughput is 6.27E+03 events/s + [COUNTERS] PROGRAM TOTAL : 17.1795s + [COUNTERS] Fortran Overhead ( 0 ) : 3.0414s + [COUNTERS] CudaCpp MEs ( 2 ) : 14.1381s for 90112 events => throughput is 6.37E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.594909e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.678262e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.618255e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.743588e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896696427369838E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.5026s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9158s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5869s for 8192 events => throughput is 1.40E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.4779s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9015s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5764s for 8192 events => throughput is 1.42E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668082030339872E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 8.8525s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3961s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.4564s for 90112 events => throughput is 1.40E+04 events/s + [COUNTERS] PROGRAM TOTAL : 8.7035s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3540s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.3495s for 90112 events => throughput is 1.42E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.440340e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.454724e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.445000e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.476512e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896696427369838E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.3614s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8452s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5162s for 8192 events => throughput is 1.59E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.3378s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8314s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5064s for 8192 events => throughput is 1.62E+04 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668082030339872E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 7.9985s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3165s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.6819s for 90112 events => throughput is 1.59E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.8877s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3042s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.5835s for 90112 events => throughput is 1.61E+04 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.641936e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.679497e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.647430e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.670221e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896696427369838E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.7613s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0489s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7124s for 8192 events => throughput is 1.15E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.7201s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0244s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6957s for 8192 events => throughput is 1.18E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668082030339872E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 10.3619s - [COUNTERS] Fortran Overhead ( 0 ) : 2.5262s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.8357s for 90112 events => throughput is 1.15E+04 events/s + [COUNTERS] PROGRAM TOTAL : 10.1095s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4855s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.6240s for 90112 events => throughput is 1.18E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.168837e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.204025e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.170011e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.203510e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftgg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697918297644E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.8333s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8161s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0172s for 8192 events => throughput is 4.77E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8365s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8192s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0172s for 8192 events => throughput is 4.75E+05 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftgg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551547592E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 2.4751s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2851s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1899s for 90112 events => throughput is 4.74E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.4662s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2768s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1894s for 90112 events => throughput is 4.76E+05 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.824575e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.814879e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.192856e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.185918e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.152345e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.154361e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.384307e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.382253e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.153285e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.160102e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.379355e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.387193e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.102798e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.108981e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.746210e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.750323e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt index 7ab99ff224..059122dda6 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt @@ -1,11 +1,11 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x - make USEBUILDDIR=1 BACKEND=cuda + + make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-05-15_02:07:00 +DATE: 2024-05-16_06:00:05 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 685 events (found 2208 events) - [COUNTERS] PROGRAM TOTAL : 0.4152s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4058s - [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.71E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4148s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4054s + [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.76E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 648 events (found 1275 events) - [COUNTERS] PROGRAM TOTAL : 0.3215s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3121s - [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.72E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3154s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3059s + [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.68E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4762 [0.47620722822826000] fbridge_mode=0 [UNWEIGHT] Wrote 1784 events (found 1789 events) - [COUNTERS] PROGRAM TOTAL : 1.3987s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2968s - [COUNTERS] Fortran MEs ( 1 ) : 0.1019s for 90112 events => throughput is 8.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3773s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2771s + [COUNTERS] Fortran MEs ( 1 ) : 0.1002s for 90112 events => throughput is 8.99E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt index 409bad5208..01167da954 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt @@ -1,22 +1,22 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 + make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-05-15_02:07:05 +DATE: 2024-05-16_06:00:09 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 685 events (found 2208 events) - [COUNTERS] PROGRAM TOTAL : 0.4157s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4063s - [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4095s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4004s + [COUNTERS] Fortran MEs ( 1 ) : 0.0092s for 8192 events => throughput is 8.95E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 648 events (found 1275 events) - [COUNTERS] PROGRAM TOTAL : 0.3201s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3107s - [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.71E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3199s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3104s + [COUNTERS] Fortran MEs ( 1 ) : 0.0095s for 8192 events => throughput is 8.61E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4762 [0.47620722822826000] fbridge_mode=0 [UNWEIGHT] Wrote 1784 events (found 1789 events) - [COUNTERS] PROGRAM TOTAL : 1.4023s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2985s - [COUNTERS] Fortran MEs ( 1 ) : 0.1038s for 90112 events => throughput is 8.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3992s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2973s + [COUNTERS] Fortran MEs ( 1 ) : 0.1018s for 90112 events => throughput is 8.85E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt index ab75757909..6c876298cd 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt @@ -1,13 +1,13 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x - - make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone + make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 + +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-05-15_02:07:09 +DATE: 2024-05-16_06:00:14 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 685 events (found 2208 events) - [COUNTERS] PROGRAM TOTAL : 0.4151s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4057s - [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4113s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4020s + [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.76E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 648 events (found 1275 events) - [COUNTERS] PROGRAM TOTAL : 0.3196s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3102s - [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3138s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3043s + [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.67E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4762 [0.47620722822826000] fbridge_mode=0 [UNWEIGHT] Wrote 1784 events (found 1789 events) - [COUNTERS] PROGRAM TOTAL : 1.4054s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3024s - [COUNTERS] Fortran MEs ( 1 ) : 0.1030s for 90112 events => throughput is 8.75E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3758s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2758s + [COUNTERS] Fortran MEs ( 1 ) : 0.1001s for 90112 events => throughput is 9.01E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt index 7f72709307..fd24a61552 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt @@ -1,7 +1,7 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-05-15_02:06:42 +DATE: 2024-05-16_05:59:47 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 2620 events (found 5403 events) - [COUNTERS] PROGRAM TOTAL : 0.8319s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7874s - [COUNTERS] Fortran MEs ( 1 ) : 0.0445s for 8192 events => throughput is 1.84E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8237s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7798s + [COUNTERS] Fortran MEs ( 1 ) : 0.0440s for 8192 events => throughput is 1.86E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4226s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3781s - [COUNTERS] Fortran MEs ( 1 ) : 0.0445s for 8192 events => throughput is 1.84E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4191s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3754s + [COUNTERS] Fortran MEs ( 1 ) : 0.0437s for 8192 events => throughput is 1.87E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.8349s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3481s - [COUNTERS] Fortran MEs ( 1 ) : 0.4868s for 90112 events => throughput is 1.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8130s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3278s + [COUNTERS] Fortran MEs ( 1 ) : 0.4852s for 90112 events => throughput is 1.86E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 171.8 [171.81273026311101] fbridge_mode=1 [UNWEIGHT] Wrote 2338 events (found 3965 events) - [COUNTERS] PROGRAM TOTAL : 0.7087s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6687s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0400s for 8192 events => throughput is 2.05E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7007s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6611s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0395s for 8192 events => throughput is 2.07E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt index a4e4ae047f..293718b73f 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt @@ -1,11 +1,11 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx + make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-05-15_02:06:48 +DATE: 2024-05-16_05:59:53 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 2620 events (found 5403 events) - [COUNTERS] PROGRAM TOTAL : 0.8388s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7944s - [COUNTERS] Fortran MEs ( 1 ) : 0.0445s for 8192 events => throughput is 1.84E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8342s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7896s + [COUNTERS] Fortran MEs ( 1 ) : 0.0446s for 8192 events => throughput is 1.84E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4222s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3777s - [COUNTERS] Fortran MEs ( 1 ) : 0.0445s for 8192 events => throughput is 1.84E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4178s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3733s + [COUNTERS] Fortran MEs ( 1 ) : 0.0444s for 8192 events => throughput is 1.84E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.8423s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3553s - [COUNTERS] Fortran MEs ( 1 ) : 0.4870s for 90112 events => throughput is 1.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8125s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3299s + [COUNTERS] Fortran MEs ( 1 ) : 0.4826s for 90112 events => throughput is 1.87E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 171.8 [171.81270286137041] fbridge_mode=1 [UNWEIGHT] Wrote 2338 events (found 3965 events) - [COUNTERS] PROGRAM TOTAL : 0.7160s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6786s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0374s for 8192 events => throughput is 2.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7025s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6657s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0367s for 8192 events => throughput is 2.23E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt index ff6733802e..f9ac9cdc3d 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt @@ -2,12 +2,12 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/s make USEBUILDDIR=1 BACKEND=cuda - make USEBUILDDIR=1 BACKEND=cppnone + + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 - make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-05-15_02:06:54 +DATE: 2024-05-16_05:59:59 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 2620 events (found 5403 events) - [COUNTERS] PROGRAM TOTAL : 0.8341s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7896s - [COUNTERS] Fortran MEs ( 1 ) : 0.0445s for 8192 events => throughput is 1.84E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8401s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7957s + [COUNTERS] Fortran MEs ( 1 ) : 0.0444s for 8192 events => throughput is 1.84E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4214s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3769s - [COUNTERS] Fortran MEs ( 1 ) : 0.0445s for 8192 events => throughput is 1.84E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4268s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3820s + [COUNTERS] Fortran MEs ( 1 ) : 0.0448s for 8192 events => throughput is 1.83E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.8373s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3500s - [COUNTERS] Fortran MEs ( 1 ) : 0.4873s for 90112 events => throughput is 1.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8476s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3594s + [COUNTERS] Fortran MEs ( 1 ) : 0.4881s for 90112 events => throughput is 1.85E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 171.8 [171.81273490068889] fbridge_mode=1 [UNWEIGHT] Wrote 2338 events (found 3965 events) - [COUNTERS] PROGRAM TOTAL : 0.7153s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6748s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0405s for 8192 events => throughput is 2.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7032s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6624s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0408s for 8192 events => throughput is 2.01E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** From 510decda32b6742fca61d783cda8ed53328aff65 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 16 May 2024 13:46:25 +0200 Subject: [PATCH 36/48] [jtmk2] rerun tmad gqttq test, the FPE #845 disappears, I will close it as not reproducible ./tmad/teeMadX.sh -gqttq +10x -fltonly -makeclean --- .../log_gqttq_mad_f_inl0_hrd0.txt | 265 ++++++++++++------ 1 file changed, 185 insertions(+), 80 deletions(-) diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index e05d3f7736..4ac5ec3dc1 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-05-16_01:43:08 +DATE: 2024-05-16_13:45:22 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1817 events) - [COUNTERS] PROGRAM TOTAL : 0.4769s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4030s - [COUNTERS] Fortran MEs ( 1 ) : 0.0739s for 8192 events => throughput is 1.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4857s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4107s + [COUNTERS] Fortran MEs ( 1 ) : 0.0749s for 8192 events => throughput is 1.09E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4080s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3346s - [COUNTERS] Fortran MEs ( 1 ) : 0.0734s for 8192 events => throughput is 1.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4161s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3412s + [COUNTERS] Fortran MEs ( 1 ) : 0.0749s for 8192 events => throughput is 1.09E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.4013s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5943s - [COUNTERS] Fortran MEs ( 1 ) : 0.8069s for 90112 events => throughput is 1.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.4361s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6155s + [COUNTERS] Fortran MEs ( 1 ) : 0.8206s for 90112 events => throughput is 1.10E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110463093540638] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4876s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4127s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0749s for 8192 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4907s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4145s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0762s for 8192 events => throughput is 1.07E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686273216112] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.4731s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6478s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8253s for 90112 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.5059s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6664s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8394s for 90112 events => throughput is 1.07E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.110229e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.092440e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.114385e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.094603e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110459152958460] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3862s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3596s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0266s for 8192 events => throughput is 3.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3934s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3663s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0270s for 8192 events => throughput is 3.03E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510683016166510] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.8914s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5982s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2932s for 90112 events => throughput is 3.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9232s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6228s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3004s for 90112 events => throughput is 3.00E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.059471e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.039638e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.086598e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.034015e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110460595003461] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3600s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3466s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0134s for 8192 events => throughput is 6.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3713s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3577s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0136s for 8192 events => throughput is 6.04E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510682502089912] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.7375s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5912s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1463s for 90112 events => throughput is 6.16E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7502s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6011s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1490s for 90112 events => throughput is 6.05E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.184642e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.923571e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.224019e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.853443e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110460595003461] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3591s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3468s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0123s for 8192 events => throughput is 6.64E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3638s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3514s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0124s for 8192 events => throughput is 6.61E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510682502089912] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.7159s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5799s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1360s for 90112 events => throughput is 6.63E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7444s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6060s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1385s for 90112 events => throughput is 6.51E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.683790e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.334346e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.800598e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.476144e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110464176080312] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3734s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3559s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0174s for 8192 events => throughput is 4.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3762s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3583s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0178s for 8192 events => throughput is 4.59E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -463,38 +463,143 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2151 [0.21510685411522326] fbridge_mode=1 + [UNWEIGHT] Wrote 1939 events (found 1944 events) + [COUNTERS] PROGRAM TOTAL : 1.8096s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6129s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1967s for 90112 events => throughput is 4.58E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510685411522326) differ by less than 4E-4 (5.3231167917999755e-08) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.750606e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.766894e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2711 [0.27110478167944563] fbridge_mode=1 + [UNWEIGHT] Wrote 404 events (found 1228 events) + [COUNTERS] PROGRAM TOTAL : 0.7793s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7788s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.48E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.27110539351263330) and cuda (0.27110478167944563) differ by less than 4E-4 (2.2568093527297606e-06) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2151 [0.21510689885789414] fbridge_mode=1 + [UNWEIGHT] Wrote 1939 events (found 1944 events) + [COUNTERS] PROGRAM TOTAL : 2.0413s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0348s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0065s for 90112 events => throughput is 1.38E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21510686556561295) and cuda (0.21510689885789414) differ by less than 4E-4 (1.547708907700951e-07) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.566939e+07 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.326602e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.593547e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.720103e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.619232e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.806222e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.144615e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.016256e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** -Program received signal SIGFPE: Floating-point exception - erroneous arithmetic operation. - -Backtrace for this error: -#0 0x7f2a1a623860 in ??? -#1 0x7f2a1a622a05 in ??? -#2 0x7f2a1a254def in ??? -#3 0x7f2a1ae20acc in ??? -#4 0x7f2a1acc4575 in ??? -#5 0x7f2a1ae1d4c9 in ??? -#6 0x7f2a1ae2570d in ??? -#7 0x7f2a1ae2afa1 in ??? -#8 0x43008b in ??? -#9 0x431c10 in ??? -#10 0x432d47 in ??? -#11 0x433b1e in ??? -#12 0x44a921 in ??? -#13 0x42ebbf in ??? -#14 0x40371e in ??? -#15 0x7f2a1a23feaf in ??? -#16 0x7f2a1a23ff5f in ??? -#17 0x403844 in ??? -#18 0xffffffffffffffff in ??? -./madX.sh: line 379: 3004240 Floating point exception(core dumped) $timecmd $cmd < ${tmpin} > ${tmp} -ERROR! ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' failed - PDF set = nn23lo1 - alpha_s(Mz)= 0.1300 running at 2 loops. - alpha_s(Mz)= 0.1300 running at 2 loops. - Renormalization scale set on event-by-event basis - Factorization scale set on event-by-event basis - - - getting user params -Enter number of events and max and min iterations: - Number of events and iterations 81920 1 1 +TEST COMPLETED From c73b7190d4780da1f8a304f62f406f385cd6fc33 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 16 May 2024 14:11:41 +0200 Subject: [PATCH 37/48] [jtmk2] additional bug fixes in tput/throughputX.sh, remove the last pending gcheck.exe --- epochX/cudacpp/tput/throughputX.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/epochX/cudacpp/tput/throughputX.sh b/epochX/cudacpp/tput/throughputX.sh index 68b06b6bd6..45c888b0e1 100755 --- a/epochX/cudacpp/tput/throughputX.sh +++ b/epochX/cudacpp/tput/throughputX.sh @@ -637,7 +637,7 @@ for exe in $exes; do if [ "${exe/build.512y}" != "${exe}" ]; then echo "$exe is not supported (no avx512vl in /proc/cpuinfo)"; continue; fi if [ "${exe/build.512z}" != "${exe}" ]; then echo "$exe is not supported (no avx512vl in /proc/cpuinfo)"; continue; fi fi - if [ "${exe%%/gcheck*}" != "${exe}" ] && [ "$gpuTxt" == "none" ]; then continue; fi + if [[ "${exe%%/check_cuda*}" != "${exe}" || "${exe%%/check_hip*}" != "${exe}" ]] && [ "$gpuTxt" == "none" ]; then pattern="${pattern}|EvtsPerSec\[Matrix"; fi if [ "${exe%%/heft_gg_bb*}" != "${exe}" ]; then # For heftggbb, use the same settings as for ggtt exeArgs="-p 2048 256 2" @@ -697,7 +697,7 @@ for exe in $exes; do cd $exeDir/.. # workaround for reading '../../Cards/param_card.dat' without setting MG5AMC_CARD_PATH unset OMP_NUM_THREADS runExe $exe "$exeArgs" - if [ "${exe%%/check*}" != "${exe}" ]; then + if [ "${exe%%/check_cpp*}" != "${exe}" ]; then if [ "${maketype}" != "-dryrun" ]; then obj=${exe%%.exe}; obj=${obj/check/CPPProcess}.o; $scrdir/simdSymSummary.sh -stripdir ${obj} -dumptotmp # comment out -dumptotmp to keep full objdump fi @@ -707,7 +707,7 @@ for exe in $exes; do runExe $exe "$exeArgs" unset OMP_NUM_THREADS fi - elif [ "${exe%%/gcheck*}" != "${exe}" ] || [ "${exe%%/alpcheck*}" != "${exe}" ]; then + elif [[ "${exe%%/check_cuda*}" != "${exe}" || "${exe%%/check_hip*}" != "${exe}" ]] || [ "${exe%%/alpcheck*}" != "${exe}" ]; then runNcu $exe "$ncuArgs" if [ "${div}" == "1" ]; then runNcuDiv $exe; fi if [ "${req}" == "1" ]; then runNcuReq $exe "$ncuArgs"; fi From 2fdea8fe52547a0484e91f94af6ff7af89200697 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 16 May 2024 14:23:40 +0200 Subject: [PATCH 38/48] [jtmk2] in tput/throughputX.sh, improve '---' and '===' separators (use the latter between any two different backends including different SIMDs) --- epochX/cudacpp/tput/throughputX.sh | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/epochX/cudacpp/tput/throughputX.sh b/epochX/cudacpp/tput/throughputX.sh index 45c888b0e1..9cbfd9d74f 100755 --- a/epochX/cudacpp/tput/throughputX.sh +++ b/epochX/cudacpp/tput/throughputX.sh @@ -614,14 +614,17 @@ echo -e "On $HOSTNAME [CPU: $cpuTxt] [GPU: $gpuTxt]:" BMKEXEARGS="" # if BMKEXEARGS is set, exeArgs is set equal to BMKEXEARGS, while exeArgs2 is set to "" BMKMULTIPLIER=1 # the pre-defined numbers of iterations (including those in BMKEXEARGS) are multiplied by BMKMULTIPLIER -lastExe= +###lastExe= +lastExeDir= ###echo "exes=$exes" for exe in $exes; do ###echo EXE=$exe; continue exeArgs2="" - if [ "$(basename $exe)" != "$lastExe" ]; then + ###if [ "$(basename $exe)" != "$lastExe" ]; then + if [ "$(basename $(dirname $exe))" != "$lastExeDir" ]; then echo "=========================================================================" - lastExe=$(basename $exe) + ###lastExe=$(basename $exe) + lastExeDir=$(basename $(dirname $exe)) else echo "-------------------------------------------------------------------------" fi From 5fec65cc9a8df54bbbb119d22af9fdbe1d33024e Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 16 May 2024 16:05:48 +0200 Subject: [PATCH 39/48] [jtmk2] rerun 102 tput tests on itscrd90 - after fixing various issues in the scripts (This took shorter than usual as the builds were cached from a previous test) STARTED AT Thu May 16 02:28:01 PM CEST 2024 ./tput/teeThroughputX.sh -mix -hrd -makej -eemumu -ggtt -ggttg -ggttgg -gqttq -ggttggg -makeclean ENDED(1) AT Thu May 16 02:48:58 PM CEST 2024 [Status=0] ./tput/teeThroughputX.sh -flt -hrd -makej -eemumu -ggtt -ggttgg -inlonly -makeclean ENDED(2) AT Thu May 16 02:57:13 PM CEST 2024 [Status=0] ./tput/teeThroughputX.sh -makej -eemumu -ggtt -ggttg -gqttq -ggttgg -ggttggg -flt -bridge -makeclean ENDED(3) AT Thu May 16 03:05:25 PM CEST 2024 [Status=0] ./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -rmbhst ENDED(4) AT Thu May 16 03:08:10 PM CEST 2024 [Status=0] ./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -curhst ENDED(5) AT Thu May 16 03:10:53 PM CEST 2024 [Status=0] ./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -common ENDED(6) AT Thu May 16 03:13:40 PM CEST 2024 [Status=0] ./tput/teeThroughputX.sh -mix -hrd -makej -susyggtt -susyggt1t1 -smeftggtttt -heftggbb -makeclean ENDED(7) AT Thu May 16 03:22:57 PM CEST 2024 [Status=0] --- .../log_eemumu_mad_d_inl0_hrd0.txt | 158 ++++++++-------- .../log_eemumu_mad_d_inl0_hrd0_bridge.txt | 158 ++++++++-------- .../log_eemumu_mad_d_inl0_hrd0_common.txt | 158 ++++++++-------- .../log_eemumu_mad_d_inl0_hrd0_curhst.txt | 158 ++++++++-------- .../log_eemumu_mad_d_inl0_hrd0_rmbhst.txt | 158 ++++++++-------- .../log_eemumu_mad_d_inl0_hrd1.txt | 158 ++++++++-------- .../log_eemumu_mad_d_inl1_hrd0.txt | 158 ++++++++-------- .../log_eemumu_mad_d_inl1_hrd1.txt | 158 ++++++++-------- .../log_eemumu_mad_f_inl0_hrd0.txt | 158 ++++++++-------- .../log_eemumu_mad_f_inl0_hrd0_bridge.txt | 158 ++++++++-------- .../log_eemumu_mad_f_inl0_hrd0_common.txt | 158 ++++++++-------- .../log_eemumu_mad_f_inl0_hrd0_curhst.txt | 158 ++++++++-------- .../log_eemumu_mad_f_inl0_hrd0_rmbhst.txt | 158 ++++++++-------- .../log_eemumu_mad_f_inl0_hrd1.txt | 158 ++++++++-------- .../log_eemumu_mad_f_inl1_hrd0.txt | 158 ++++++++-------- .../log_eemumu_mad_f_inl1_hrd1.txt | 158 ++++++++-------- .../log_eemumu_mad_m_inl0_hrd0.txt | 158 ++++++++-------- .../log_eemumu_mad_m_inl0_hrd1.txt | 158 ++++++++-------- .../log_ggtt_mad_d_inl0_hrd0.txt | 158 ++++++++-------- .../log_ggtt_mad_d_inl0_hrd0_bridge.txt | 158 ++++++++-------- .../log_ggtt_mad_d_inl0_hrd0_common.txt | 158 ++++++++-------- .../log_ggtt_mad_d_inl0_hrd0_curhst.txt | 158 ++++++++-------- .../log_ggtt_mad_d_inl0_hrd0_rmbhst.txt | 158 ++++++++-------- .../log_ggtt_mad_d_inl0_hrd1.txt | 158 ++++++++-------- .../log_ggtt_mad_d_inl1_hrd0.txt | 158 ++++++++-------- .../log_ggtt_mad_d_inl1_hrd1.txt | 158 ++++++++-------- .../log_ggtt_mad_f_inl0_hrd0.txt | 158 ++++++++-------- .../log_ggtt_mad_f_inl0_hrd0_bridge.txt | 158 ++++++++-------- .../log_ggtt_mad_f_inl0_hrd0_common.txt | 158 ++++++++-------- .../log_ggtt_mad_f_inl0_hrd0_curhst.txt | 158 ++++++++-------- .../log_ggtt_mad_f_inl0_hrd0_rmbhst.txt | 158 ++++++++-------- .../log_ggtt_mad_f_inl0_hrd1.txt | 158 ++++++++-------- .../log_ggtt_mad_f_inl1_hrd0.txt | 158 ++++++++-------- .../log_ggtt_mad_f_inl1_hrd1.txt | 158 ++++++++-------- .../log_ggtt_mad_m_inl0_hrd0.txt | 158 ++++++++-------- .../log_ggtt_mad_m_inl0_hrd1.txt | 158 ++++++++-------- .../log_ggttg_mad_d_inl0_hrd0.txt | 174 +++++++++--------- .../log_ggttg_mad_d_inl0_hrd0_bridge.txt | 174 +++++++++--------- .../log_ggttg_mad_d_inl0_hrd1.txt | 174 +++++++++--------- .../log_ggttg_mad_f_inl0_hrd0.txt | 174 +++++++++--------- .../log_ggttg_mad_f_inl0_hrd0_bridge.txt | 174 +++++++++--------- .../log_ggttg_mad_f_inl0_hrd1.txt | 174 +++++++++--------- .../log_ggttg_mad_m_inl0_hrd0.txt | 174 +++++++++--------- .../log_ggttg_mad_m_inl0_hrd1.txt | 174 +++++++++--------- .../log_ggttgg_mad_d_inl0_hrd0.txt | 174 +++++++++--------- .../log_ggttgg_mad_d_inl0_hrd0_bridge.txt | 174 +++++++++--------- .../log_ggttgg_mad_d_inl0_hrd0_common.txt | 174 +++++++++--------- .../log_ggttgg_mad_d_inl0_hrd0_curhst.txt | 174 +++++++++--------- .../log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt | 174 +++++++++--------- .../log_ggttgg_mad_d_inl0_hrd1.txt | 174 +++++++++--------- .../log_ggttgg_mad_d_inl1_hrd0.txt | 174 +++++++++--------- .../log_ggttgg_mad_d_inl1_hrd1.txt | 174 +++++++++--------- .../log_ggttgg_mad_f_inl0_hrd0.txt | 174 +++++++++--------- .../log_ggttgg_mad_f_inl0_hrd0_bridge.txt | 174 +++++++++--------- .../log_ggttgg_mad_f_inl0_hrd0_common.txt | 174 +++++++++--------- .../log_ggttgg_mad_f_inl0_hrd0_curhst.txt | 174 +++++++++--------- .../log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt | 174 +++++++++--------- .../log_ggttgg_mad_f_inl0_hrd1.txt | 174 +++++++++--------- .../log_ggttgg_mad_f_inl1_hrd0.txt | 174 +++++++++--------- .../log_ggttgg_mad_f_inl1_hrd1.txt | 174 +++++++++--------- .../log_ggttgg_mad_m_inl0_hrd0.txt | 174 +++++++++--------- .../log_ggttgg_mad_m_inl0_hrd1.txt | 174 +++++++++--------- .../log_ggttggg_mad_d_inl0_hrd0.txt | 174 +++++++++--------- .../log_ggttggg_mad_d_inl0_hrd0_bridge.txt | 174 +++++++++--------- .../log_ggttggg_mad_d_inl0_hrd1.txt | 174 +++++++++--------- .../log_ggttggg_mad_f_inl0_hrd0.txt | 174 +++++++++--------- .../log_ggttggg_mad_f_inl0_hrd0_bridge.txt | 174 +++++++++--------- .../log_ggttggg_mad_f_inl0_hrd1.txt | 174 +++++++++--------- .../log_ggttggg_mad_m_inl0_hrd0.txt | 174 +++++++++--------- .../log_ggttggg_mad_m_inl0_hrd1.txt | 174 +++++++++--------- .../log_gqttq_mad_d_inl0_hrd0.txt | 174 +++++++++--------- .../log_gqttq_mad_d_inl0_hrd0_bridge.txt | 174 +++++++++--------- .../log_gqttq_mad_d_inl0_hrd1.txt | 174 +++++++++--------- .../log_gqttq_mad_f_inl0_hrd0.txt | 174 +++++++++--------- .../log_gqttq_mad_f_inl0_hrd0_bridge.txt | 174 +++++++++--------- .../log_gqttq_mad_f_inl0_hrd1.txt | 174 +++++++++--------- .../log_gqttq_mad_m_inl0_hrd0.txt | 174 +++++++++--------- .../log_gqttq_mad_m_inl0_hrd1.txt | 174 +++++++++--------- .../log_heftggbb_mad_d_inl0_hrd0.txt | 158 ++++++++-------- .../log_heftggbb_mad_d_inl0_hrd1.txt | 158 ++++++++-------- .../log_heftggbb_mad_f_inl0_hrd0.txt | 158 ++++++++-------- .../log_heftggbb_mad_f_inl0_hrd1.txt | 158 ++++++++-------- .../log_heftggbb_mad_m_inl0_hrd0.txt | 158 ++++++++-------- .../log_heftggbb_mad_m_inl0_hrd1.txt | 158 ++++++++-------- .../log_smeftggtttt_mad_d_inl0_hrd0.txt | 174 +++++++++--------- .../log_smeftggtttt_mad_d_inl0_hrd1.txt | 174 +++++++++--------- .../log_smeftggtttt_mad_f_inl0_hrd0.txt | 174 +++++++++--------- .../log_smeftggtttt_mad_f_inl0_hrd1.txt | 174 +++++++++--------- .../log_smeftggtttt_mad_m_inl0_hrd0.txt | 174 +++++++++--------- .../log_smeftggtttt_mad_m_inl0_hrd1.txt | 174 +++++++++--------- .../log_susyggt1t1_mad_d_inl0_hrd0.txt | 158 ++++++++-------- .../log_susyggt1t1_mad_d_inl0_hrd1.txt | 158 ++++++++-------- .../log_susyggt1t1_mad_f_inl0_hrd0.txt | 158 ++++++++-------- .../log_susyggt1t1_mad_f_inl0_hrd1.txt | 158 ++++++++-------- .../log_susyggt1t1_mad_m_inl0_hrd0.txt | 158 ++++++++-------- .../log_susyggt1t1_mad_m_inl0_hrd1.txt | 158 ++++++++-------- .../log_susyggtt_mad_d_inl0_hrd0.txt | 158 ++++++++-------- .../log_susyggtt_mad_d_inl0_hrd1.txt | 158 ++++++++-------- .../log_susyggtt_mad_f_inl0_hrd0.txt | 158 ++++++++-------- .../log_susyggtt_mad_f_inl0_hrd1.txt | 158 ++++++++-------- .../log_susyggtt_mad_m_inl0_hrd0.txt | 158 ++++++++-------- .../log_susyggtt_mad_m_inl0_hrd1.txt | 158 ++++++++-------- 102 files changed, 8442 insertions(+), 8442 deletions(-) diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index 013110c0dd..8f9275e4c9 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-15_08:50:09 +DATE: 2024-05-16_14:32:11 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.805405e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.952080e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.190913e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.832141e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.963737e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.194654e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.672339 sec +TOTAL : 0.663591 sec INFO: No Floating Point Exceptions have been reported - 2,542,567,494 cycles # 2.815 GHz - 3,941,097,411 instructions # 1.55 insn per cycle - 0.969294589 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 + 2,552,888,166 cycles # 2.852 GHz + 3,988,214,096 instructions # 1.56 insn per cycle + 0.955666761 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 Avg ME (F77/GPU) = 1.2828039868165201E-002 Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.054726e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.237655e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.237655e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.053499e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.235255e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.235255e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.385002 sec +TOTAL : 6.389071 sec INFO: No Floating Point Exceptions have been reported - 18,269,202,165 cycles # 2.859 GHz - 43,967,069,972 instructions # 2.41 insn per cycle - 6.390334100 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) + 18,286,976,651 cycles # 2.860 GHz + 43,966,894,713 instructions # 2.40 insn per cycle + 6.394304827 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.552394e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.025542e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.025542e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.575090e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.062276e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.062276e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.476521 sec +TOTAL : 4.417510 sec INFO: No Floating Point Exceptions have been reported - 12,728,052,416 cycles # 2.841 GHz - 30,998,789,943 instructions # 2.44 insn per cycle - 4.481749609 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1644) (avx2: 0) (512y: 0) (512z: 0) + 12,744,894,916 cycles # 2.883 GHz + 31,001,019,523 instructions # 2.43 insn per cycle + 4.422588286 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1644) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.855020e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.546950e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.546950e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.946862e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.717949e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.717949e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.813175 sec +TOTAL : 3.650052 sec INFO: No Floating Point Exceptions have been reported - 10,046,089,223 cycles # 2.632 GHz - 19,366,150,955 instructions # 1.93 insn per cycle - 3.818350781 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1966) (512y: 0) (512z: 0) + 10,049,299,034 cycles # 2.750 GHz + 19,366,983,583 instructions # 1.93 insn per cycle + 3.655131055 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1966) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.996112e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.810288e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.810288e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.022084e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.853820e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.853820e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.569526 sec +TOTAL : 3.526182 sec INFO: No Floating Point Exceptions have been reported - 9,728,622,739 cycles # 2.723 GHz - 18,978,342,140 instructions # 1.95 insn per cycle - 3.574740866 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1690) (512y: 181) (512z: 0) + 9,727,313,452 cycles # 2.755 GHz + 18,976,774,064 instructions # 1.95 insn per cycle + 3.531366474 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1690) (512y: 181) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.669755e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.196947e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.196947e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.695166e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.233065e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.233065e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.188754 sec +TOTAL : 4.130967 sec INFO: No Floating Point Exceptions have been reported - 8,562,533,600 cycles # 2.042 GHz - 15,730,003,481 instructions # 1.84 insn per cycle - 4.193992786 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 901) (512y: 154) (512z: 1258) + 8,580,931,991 cycles # 2.075 GHz + 15,727,945,386 instructions # 1.83 insn per cycle + 4.136130895 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 901) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt index cf13a69eb9..556a164c58 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt @@ -40,11 +40,11 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-15_09:17:38 +DATE: 2024-05-16_14:59:51 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -53,16 +53,16 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.463666e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.527720e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.527720e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.482485e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.592798e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.592798e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.331144 sec +TOTAL : 2.321030 sec INFO: No Floating Point Exceptions have been reported - 7,322,660,282 cycles # 2.842 GHz - 13,188,655,592 instructions # 1.80 insn per cycle - 2.634504246 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge + 7,329,722,152 cycles # 2.840 GHz + 13,178,162,400 instructions # 1.80 insn per cycle + 2.637544426 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) @@ -70,19 +70,19 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 Avg ME (F77/GPU) = 1.2828039868165201E-002 Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -90,28 +90,28 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.018307e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.187576e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.187576e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.015910e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.185158e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.185158e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.804531 sec +TOTAL : 6.819356 sec INFO: No Floating Point Exceptions have been reported - 19,459,627,277 cycles # 2.858 GHz - 44,193,955,664 instructions # 2.27 insn per cycle - 6.811255280 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) + 19,469,024,508 cycles # 2.853 GHz + 44,194,459,972 instructions # 2.27 insn per cycle + 6.826135735 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -119,28 +119,28 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.482019e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.908717e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.908717e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.484528e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.911785e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.911785e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.879908 sec +TOTAL : 4.870031 sec INFO: No Floating Point Exceptions have been reported - 13,957,313,163 cycles # 2.857 GHz - 31,844,057,421 instructions # 2.28 insn per cycle - 4.886813449 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1644) (avx2: 0) (512y: 0) (512z: 0) + 13,928,289,994 cycles # 2.857 GHz + 31,840,505,402 instructions # 2.29 insn per cycle + 4.876819018 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1644) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -148,28 +148,28 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.812993e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.461899e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.461899e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.770728e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.386706e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.386706e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.110424 sec +TOTAL : 4.194561 sec INFO: No Floating Point Exceptions have been reported - 11,257,664,295 cycles # 2.735 GHz - 20,727,911,170 instructions # 1.84 insn per cycle - 4.117109353 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1966) (512y: 0) (512z: 0) + 11,361,909,372 cycles # 2.705 GHz + 20,728,193,515 instructions # 1.82 insn per cycle + 4.201564491 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1966) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -177,28 +177,28 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.869279e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.573803e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.573803e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.837895e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.511052e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.511052e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.004022 sec +TOTAL : 4.060412 sec INFO: No Floating Point Exceptions have been reported - 10,948,992,779 cycles # 2.731 GHz - 20,338,868,359 instructions # 1.86 insn per cycle - 4.010773587 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1690) (512y: 181) (512z: 0) + 10,967,372,142 cycles # 2.697 GHz + 20,348,024,135 instructions # 1.86 insn per cycle + 4.067336299 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1690) (512y: 181) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -206,22 +206,22 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.576100e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.035998e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.035998e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.574771e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.036722e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.036722e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.638455 sec +TOTAL : 4.642745 sec INFO: No Floating Point Exceptions have been reported - 9,862,529,153 cycles # 2.123 GHz - 16,873,171,812 instructions # 1.71 insn per cycle - 4.645277576 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 901) (512y: 154) (512z: 1258) + 9,876,735,144 cycles # 2.125 GHz + 16,873,564,045 instructions # 1.71 insn per cycle + 4.649693422 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 901) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt index e8daf97743..752636bf13 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-15_09:28:43 +DATE: 2024-05-16_15:10:56 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.531146e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.607248e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.140926e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.514734e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.592124e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.118434e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 1.379056 sec +TOTAL : 1.385277 sec INFO: No Floating Point Exceptions have been reported - 4,577,843,489 cycles # 2.841 GHz - 7,096,304,812 instructions # 1.55 insn per cycle - 1.668143278 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --common + 4,585,820,337 cycles # 2.836 GHz + 7,177,605,134 instructions # 1.57 insn per cycle + 1.675534023 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 Avg ME (F77/GPU) = 1.2828039868165201E-002 Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.053954e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.236082e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.236082e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.053610e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.236408e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.236408e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 6.760962 sec +TOTAL : 6.764205 sec INFO: No Floating Point Exceptions have been reported - 19,360,467,507 cycles # 2.862 GHz - 44,070,598,018 instructions # 2.28 insn per cycle - 6.766561857 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) + 19,353,708,257 cycles # 2.859 GHz + 44,070,957,602 instructions # 2.28 insn per cycle + 6.769682162 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.564029e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.045025e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.045025e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.538542e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.015186e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.015186e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.813937 sec +TOTAL : 4.882143 sec INFO: No Floating Point Exceptions have been reported - 13,789,517,550 cycles # 2.862 GHz - 31,001,132,263 instructions # 2.25 insn per cycle - 4.819327883 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1644) (avx2: 0) (512y: 0) (512z: 0) + 13,858,733,837 cycles # 2.836 GHz + 31,001,638,282 instructions # 2.24 insn per cycle + 4.887574523 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1644) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.923166e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.678985e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.678985e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.916460e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.668694e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.668694e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.064292 sec +TOTAL : 4.073472 sec INFO: No Floating Point Exceptions have been reported - 11,129,699,703 cycles # 2.736 GHz - 19,270,255,649 instructions # 1.73 insn per cycle - 4.069782670 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1966) (512y: 0) (512z: 0) + 11,156,313,503 cycles # 2.736 GHz + 19,267,334,271 instructions # 1.73 insn per cycle + 4.078862770 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1966) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.996543e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.814111e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.814111e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.001009e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.815270e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.815270e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.949101 sec +TOTAL : 3.939832 sec INFO: No Floating Point Exceptions have been reported - 10,841,394,049 cycles # 2.742 GHz - 18,690,520,828 instructions # 1.72 insn per cycle - 3.954645583 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1690) (512y: 181) (512z: 0) + 10,815,664,939 cycles # 2.742 GHz + 18,691,798,772 instructions # 1.73 insn per cycle + 3.945208768 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1690) (512y: 181) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.670627e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.195421e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.195421e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.659550e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.188571e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.188571e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.567011 sec +TOTAL : 4.600299 sec INFO: No Floating Point Exceptions have been reported - 9,663,502,451 cycles # 2.114 GHz - 15,431,673,204 instructions # 1.60 insn per cycle - 4.572422830 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 901) (512y: 154) (512z: 1258) + 9,711,022,403 cycles # 2.112 GHz + 15,432,876,214 instructions # 1.59 insn per cycle + 4.605929662 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 901) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt index 6d168a519f..55f8e65d60 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-15_09:26:00 +DATE: 2024-05-16_15:08:14 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 12 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.533170e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.604050e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.151992e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.528786e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.598045e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.123971e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.007981 sec +TOTAL : 1.009724 sec INFO: No Floating Point Exceptions have been reported - 3,512,519,696 cycles # 2.832 GHz - 7,067,616,019 instructions # 2.01 insn per cycle - 1.297359256 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst + 3,524,520,771 cycles # 2.832 GHz + 6,986,852,382 instructions # 1.98 insn per cycle + 1.303213412 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 Avg ME (F77/GPU) = 1.2828039868165201E-002 Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.055935e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.238269e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.238269e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.055666e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.238246e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.238246e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.378819 sec +TOTAL : 6.381413 sec INFO: No Floating Point Exceptions have been reported - 18,256,936,501 cycles # 2.860 GHz - 43,967,132,932 instructions # 2.41 insn per cycle - 6.384352391 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) + 18,267,573,257 cycles # 2.861 GHz + 43,966,026,516 instructions # 2.41 insn per cycle + 6.386884750 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.561076e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.041870e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.041870e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.556382e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.037173e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.037173e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.456789 sec +TOTAL : 4.468246 sec INFO: No Floating Point Exceptions have been reported - 12,749,121,543 cycles # 2.858 GHz - 30,998,815,432 instructions # 2.43 insn per cycle - 4.462190413 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1644) (avx2: 0) (512y: 0) (512z: 0) + 12,780,247,489 cycles # 2.858 GHz + 30,998,946,765 instructions # 2.43 insn per cycle + 4.473714210 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1644) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.929635e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.687321e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.687321e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.920501e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.673029e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.673029e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.680933 sec +TOTAL : 3.698478 sec INFO: No Floating Point Exceptions have been reported - 10,043,859,163 cycles # 2.725 GHz - 19,365,869,375 instructions # 1.93 insn per cycle - 3.686641064 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1966) (512y: 0) (512z: 0) + 10,077,708,073 cycles # 2.723 GHz + 19,366,955,499 instructions # 1.92 insn per cycle + 3.704491612 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1966) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.003881e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.819685e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.819685e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.006263e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.826924e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.826924e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.556980 sec +TOTAL : 3.554344 sec INFO: No Floating Point Exceptions have been reported - 9,707,455,839 cycles # 2.726 GHz - 18,987,230,579 instructions # 1.96 insn per cycle - 3.562346845 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1690) (512y: 181) (512z: 0) + 9,708,097,650 cycles # 2.728 GHz + 18,987,540,468 instructions # 1.96 insn per cycle + 3.559725957 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1690) (512y: 181) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.666494e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.194230e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.194230e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.673906e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.201263e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.201263e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.197918 sec +TOTAL : 4.180852 sec INFO: No Floating Point Exceptions have been reported - 8,600,797,083 cycles # 2.047 GHz - 15,729,794,856 instructions # 1.83 insn per cycle - 4.203466995 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 901) (512y: 154) (512z: 1258) + 8,572,399,884 cycles # 2.048 GHz + 15,727,509,673 instructions # 1.83 insn per cycle + 4.186463799 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 901) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt index 19a199da15..8320028620 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt @@ -40,177 +40,177 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-15_09:23:15 +DATE: 2024-05-16_15:05:29 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --rmbhst OMP= WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.839913e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.559208e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.040532e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.845001e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.545626e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.012755e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.943278 sec +TOTAL : 1.945484 sec INFO: No Floating Point Exceptions have been reported - 6,181,845,855 cycles # 2.840 GHz - 11,415,068,692 instructions # 1.85 insn per cycle - 2.232241621 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst + 6,182,952,030 cycles # 2.840 GHz + 11,472,065,600 instructions # 1.86 insn per cycle + 2.234065267 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 Avg ME (F77/GPU) = 1.2828039868165201E-002 Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.053691e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.235890e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.235890e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.053191e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.234940e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.234940e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.391359 sec +TOTAL : 6.394281 sec INFO: No Floating Point Exceptions have been reported - 18,284,615,829 cycles # 2.859 GHz - 43,966,934,303 instructions # 2.40 insn per cycle - 6.396729569 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) + 18,294,474,856 cycles # 2.859 GHz + 43,971,000,114 instructions # 2.40 insn per cycle + 6.399562206 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.548538e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.027318e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.027318e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.557791e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.040989e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.040989e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.492815 sec +TOTAL : 4.463787 sec INFO: No Floating Point Exceptions have been reported - 12,799,194,167 cycles # 2.850 GHz - 31,002,998,440 instructions # 2.42 insn per cycle - 4.498423848 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1644) (avx2: 0) (512y: 0) (512z: 0) + 12,772,778,524 cycles # 2.859 GHz + 30,998,712,334 instructions # 2.43 insn per cycle + 4.469196075 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1644) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.927818e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.684883e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.684883e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.925684e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.680642e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.680642e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.684126 sec +TOTAL : 3.688587 sec INFO: No Floating Point Exceptions have been reported - 10,059,075,308 cycles # 2.727 GHz - 19,365,321,698 instructions # 1.93 insn per cycle - 3.689518031 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1966) (512y: 0) (512z: 0) + 10,072,112,495 cycles # 2.727 GHz + 19,365,616,714 instructions # 1.92 insn per cycle + 3.694022814 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1966) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.005912e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.823770e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.823770e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.999182e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.825411e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.825411e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.554785 sec +TOTAL : 3.565445 sec INFO: No Floating Point Exceptions have been reported - 9,714,767,873 cycles # 2.730 GHz - 18,977,049,698 instructions # 1.95 insn per cycle - 3.560177942 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1690) (512y: 181) (512z: 0) + 9,737,532,909 cycles # 2.728 GHz + 18,976,607,709 instructions # 1.95 insn per cycle + 3.570830090 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1690) (512y: 181) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.671731e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.199906e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.199906e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.671566e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.197159e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.197159e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.183865 sec +TOTAL : 4.185653 sec INFO: No Floating Point Exceptions have been reported - 8,579,840,171 cycles # 2.048 GHz - 15,727,472,225 instructions # 1.83 insn per cycle - 4.189226380 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 901) (512y: 154) (512z: 1258) + 8,570,262,444 cycles # 2.045 GHz + 15,727,819,138 instructions # 1.84 insn per cycle + 4.191109662 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 901) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt index 33c7f18330..6aa3de3ecf 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-15_08:50:40 +DATE: 2024-05-16_14:32:42 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/gcheck.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.810458e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.974947e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.234210e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.832072e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.951586e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.177922e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.664919 sec +TOTAL : 0.666506 sec INFO: No Floating Point Exceptions have been reported - 2,531,875,227 cycles # 2.826 GHz - 3,924,651,160 instructions # 1.55 insn per cycle - 0.957718090 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/gcheck.exe -p 2048 256 1 + 2,570,661,306 cycles # 2.841 GHz + 3,994,547,928 instructions # 1.55 insn per cycle + 0.967516454 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 Avg ME (F77/GPU) = 1.2828039868165201E-002 Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.106178e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.308715e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.308715e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.115974e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.322257e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.322257e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.107328 sec +TOTAL : 6.054676 sec INFO: No Floating Point Exceptions have been reported - 17,473,849,627 cycles # 2.859 GHz - 41,813,289,157 instructions # 2.39 insn per cycle - 6.112590469 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 392) (avx2: 0) (512y: 0) (512z: 0) + 17,515,565,744 cycles # 2.891 GHz + 41,813,477,100 instructions # 2.39 insn per cycle + 6.059803806 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 392) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.602402e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.113473e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.113473e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.620797e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.138518e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.138518e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.350705 sec +TOTAL : 4.301844 sec INFO: No Floating Point Exceptions have been reported - 12,445,011,688 cycles # 2.858 GHz - 30,161,192,299 instructions # 2.42 insn per cycle - 4.355874746 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1612) (avx2: 0) (512y: 0) (512z: 0) + 12,450,766,554 cycles # 2.891 GHz + 30,161,114,565 instructions # 2.42 insn per cycle + 4.307292943 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1612) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.933515e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.700181e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.700181e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.953305e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.731201e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.731201e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.673811 sec +TOTAL : 3.641578 sec INFO: No Floating Point Exceptions have been reported - 9,974,481,462 cycles # 2.712 GHz - 19,097,432,832 instructions # 1.91 insn per cycle - 3.679029945 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1931) (512y: 0) (512z: 0) + 9,958,194,708 cycles # 2.732 GHz + 19,097,340,022 instructions # 1.92 insn per cycle + 3.646748223 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1931) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165090E-002 Relative difference = 1.0277089176796747e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.017924e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.864367e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.864367e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.030144e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.871070e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.871070e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.534233 sec +TOTAL : 3.516336 sec INFO: No Floating Point Exceptions have been reported - 9,647,224,526 cycles # 2.726 GHz - 18,756,987,578 instructions # 1.94 insn per cycle - 3.539409278 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1662) (512y: 178) (512z: 0) + 9,615,329,857 cycles # 2.731 GHz + 18,757,197,169 instructions # 1.95 insn per cycle + 3.521486960 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1662) (512y: 178) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165090E-002 Relative difference = 1.0277089176796747e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.715481e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.272578e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.272578e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.716452e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.276447e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.276447e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.089098 sec +TOTAL : 4.085042 sec INFO: No Floating Point Exceptions have been reported - 8,410,617,370 cycles # 2.055 GHz - 15,604,171,588 instructions # 1.86 insn per cycle - 4.094301669 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 887) (512y: 156) (512z: 1239) + 8,419,965,935 cycles # 2.059 GHz + 15,604,092,420 instructions # 1.85 insn per cycle + 4.090208290 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 887) (512y: 156) (512z: 1239) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt index 33fed0551a..b8b45776b1 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-15_09:08:08 +DATE: 2024-05-16_14:50:17 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/gcheck.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.628819e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.711705e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.169619e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.702175e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.710921e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.156854e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.687102 sec +TOTAL : 0.694390 sec INFO: No Floating Point Exceptions have been reported - 2,611,517,864 cycles # 2.817 GHz - 4,097,091,457 instructions # 1.57 insn per cycle - 0.983136730 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/gcheck.exe -p 2048 256 1 + 2,605,646,002 cycles # 2.805 GHz + 4,054,878,448 instructions # 1.56 insn per cycle + 0.990293563 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 Avg ME (F77/GPU) = 1.2828039868165201E-002 Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.574373e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.016662e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.016662e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.570616e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.013848e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.013848e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.417758 sec +TOTAL : 4.427060 sec INFO: No Floating Point Exceptions have been reported - 12,636,637,133 cycles # 2.858 GHz - 32,511,207,265 instructions # 2.57 insn per cycle - 4.423102265 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 296) (avx2: 0) (512y: 0) (512z: 0) + 12,653,335,495 cycles # 2.855 GHz + 32,508,582,789 instructions # 2.57 insn per cycle + 4.432506316 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 296) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.003415e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.868406e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.868406e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.997942e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.867798e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.867798e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.564130 sec +TOTAL : 3.572733 sec INFO: No Floating Point Exceptions have been reported - 10,200,579,028 cycles # 2.858 GHz - 24,474,078,033 instructions # 2.40 insn per cycle - 3.569608882 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1251) (avx2: 0) (512y: 0) (512z: 0) + 10,224,022,422 cycles # 2.858 GHz + 24,474,305,392 instructions # 2.39 insn per cycle + 3.578147466 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1251) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.159366e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.171580e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.171580e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.166030e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.174429e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.174429e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.334345 sec +TOTAL : 3.325864 sec INFO: No Floating Point Exceptions have been reported - 9,137,150,683 cycles # 2.737 GHz - 16,925,063,810 instructions # 1.85 insn per cycle - 3.339844561 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1631) (512y: 0) (512z: 0) + 9,098,194,590 cycles # 2.732 GHz + 16,922,780,551 instructions # 1.86 insn per cycle + 3.331363940 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1631) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.217918e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.276168e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.276168e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.212975e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.265291e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.265291e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.257138 sec +TOTAL : 3.263243 sec INFO: No Floating Point Exceptions have been reported - 8,913,058,262 cycles # 2.733 GHz - 16,333,083,963 instructions # 1.83 insn per cycle - 3.262669724 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1370) (512y: 139) (512z: 0) + 8,899,581,855 cycles # 2.723 GHz + 16,332,700,862 instructions # 1.84 insn per cycle + 3.268811314 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1370) (512y: 139) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.841334e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.527766e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.527766e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.881513e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.573087e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.573087e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.844554 sec +TOTAL : 3.766130 sec INFO: No Floating Point Exceptions have been reported - 7,947,309,723 cycles # 2.070 GHz - 14,588,089,470 instructions # 1.84 insn per cycle - 3.850353922 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1015) (512y: 158) (512z: 955) + 7,870,004,063 cycles # 2.087 GHz + 14,582,523,760 instructions # 1.85 insn per cycle + 3.771527980 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1015) (512y: 158) (512z: 955) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt index 40e4c2710d..36ca3a055a 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-15_09:08:35 +DATE: 2024-05-16_14:50:44 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/gcheck.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.672056e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.715961e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.200378e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.703728e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.728666e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.213805e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.688059 sec +TOTAL : 0.683308 sec INFO: No Floating Point Exceptions have been reported - 2,628,811,652 cycles # 2.824 GHz - 4,098,061,770 instructions # 1.56 insn per cycle - 0.989964852 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/gcheck.exe -p 2048 256 1 + 2,600,218,993 cycles # 2.827 GHz + 4,020,842,023 instructions # 1.55 insn per cycle + 0.979103636 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 Avg ME (F77/GPU) = 1.2828039868165201E-002 Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.088773e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.951304e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.951304e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.089739e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.950650e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.950650e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.432390 sec +TOTAL : 3.429960 sec INFO: No Floating Point Exceptions have been reported - 9,816,761,508 cycles # 2.856 GHz - 25,389,812,172 instructions # 2.59 insn per cycle - 3.437900655 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 249) (avx2: 0) (512y: 0) (512z: 0) + 9,811,818,087 cycles # 2.857 GHz + 25,388,363,151 instructions # 2.59 insn per cycle + 3.435408237 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 249) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.323639e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.570997e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.570997e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.308707e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.576242e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.576242e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.131480 sec +TOTAL : 3.150608 sec INFO: No Floating Point Exceptions have been reported - 8,960,455,508 cycles # 2.857 GHz - 21,483,342,086 instructions # 2.40 insn per cycle - 3.137065961 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1119) (avx2: 0) (512y: 0) (512z: 0) + 9,017,040,298 cycles # 2.858 GHz + 21,483,572,468 instructions # 2.38 insn per cycle + 3.156151233 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1119) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.323393e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.523515e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.523515e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.317514e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.502934e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.502934e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.129809 sec +TOTAL : 3.137476 sec INFO: No Floating Point Exceptions have been reported - 8,590,805,157 cycles # 2.741 GHz - 15,810,902,744 instructions # 1.84 insn per cycle - 3.135401923 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1494) (512y: 0) (512z: 0) + 8,591,075,632 cycles # 2.734 GHz + 15,811,134,800 instructions # 1.84 insn per cycle + 3.143116597 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1494) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.371590e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.617449e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.617449e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.364262e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.604788e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.604788e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.072734 sec +TOTAL : 3.081738 sec INFO: No Floating Point Exceptions have been reported - 8,426,985,572 cycles # 2.738 GHz - 15,515,147,883 instructions # 1.84 insn per cycle - 3.078306295 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1268) (512y: 139) (512z: 0) + 8,442,051,612 cycles # 2.735 GHz + 15,504,513,991 instructions # 1.84 insn per cycle + 3.087247738 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1268) (512y: 139) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.994914e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.793395e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.793395e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.995819e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.803385e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.803385e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.576498 sec +TOTAL : 3.575599 sec INFO: No Floating Point Exceptions have been reported - 7,555,837,029 cycles # 2.111 GHz - 14,284,009,962 instructions # 1.89 insn per cycle - 3.582086451 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1041) (512y: 164) (512z: 874) + 7,560,717,738 cycles # 2.112 GHz + 14,283,918,013 instructions # 1.89 insn per cycle + 3.581217674 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1041) (512y: 164) (512z: 874) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index 8013a0a990..e6a48e18ea 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-15_08:51:10 +DATE: 2024-05-16_14:33:12 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.595984e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.320772e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.301811e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.602023e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.319974e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.288580e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.570737 sec +TOTAL : 0.573413 sec INFO: No Floating Point Exceptions have been reported - 2,246,486,544 cycles # 2.823 GHz - 3,506,634,931 instructions # 1.56 insn per cycle - 0.852338149 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 + 2,248,961,780 cycles # 2.823 GHz + 3,510,545,687 instructions # 1.56 insn per cycle + 0.854969378 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 Avg ME (F77/GPU) = 1.2828112125134794E-002 Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.083847e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.285456e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.285456e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.082006e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.283429e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.283429e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.179178 sec +TOTAL : 6.191643 sec INFO: No Floating Point Exceptions have been reported - 17,746,031,356 cycles # 2.870 GHz - 43,508,011,437 instructions # 2.45 insn per cycle - 6.184344065 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) + 17,740,084,916 cycles # 2.864 GHz + 43,510,870,904 instructions # 2.45 insn per cycle + 6.196630799 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039854866802E-002 Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.238662e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.428787e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.428787e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.223628e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.400190e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.400190e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.189594 sec +TOTAL : 3.211334 sec INFO: No Floating Point Exceptions have been reported - 9,235,498,127 cycles # 2.892 GHz - 21,907,480,657 instructions # 2.37 insn per cycle - 3.194460576 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1938) (avx2: 0) (512y: 0) (512z: 0) + 9,244,093,548 cycles # 2.875 GHz + 21,907,620,538 instructions # 2.37 insn per cycle + 3.216400901 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1938) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039280066150E-002 Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.409366e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.687254e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.687254e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.419643e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.716778e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.716778e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.980854 sec +TOTAL : 2.967957 sec INFO: No Floating Point Exceptions have been reported - 8,292,293,761 cycles # 2.778 GHz - 15,591,054,832 instructions # 1.88 insn per cycle - 2.986051095 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2596) (512y: 0) (512z: 0) + 8,316,472,651 cycles # 2.798 GHz + 15,592,546,873 instructions # 1.87 insn per cycle + 2.972957823 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2596) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.428922e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.741361e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.741361e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.415944e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.716005e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.716005e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.960614 sec +TOTAL : 2.973842 sec INFO: No Floating Point Exceptions have been reported - 8,224,129,141 cycles # 2.775 GHz - 15,429,529,767 instructions # 1.88 insn per cycle - 2.965545528 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) + 8,315,082,395 cycles # 2.793 GHz + 15,436,266,122 instructions # 1.86 insn per cycle + 2.978955673 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.377921e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.593574e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.593574e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.416813e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.677411e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.677411e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 3.020695 sec +TOTAL : 2.974949 sec INFO: No Floating Point Exceptions have been reported - 6,668,305,047 cycles # 2.205 GHz - 12,864,023,469 instructions # 1.93 insn per cycle - 3.025878438 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) + 6,610,937,423 cycles # 2.219 GHz + 12,863,752,208 instructions # 1.95 insn per cycle + 2.980091060 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052585973637E-002 Relative difference = 2.0158743040564767e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt index 3c9816664d..29c604a610 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt @@ -40,11 +40,11 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-15_09:18:13 +DATE: 2024-05-16_15:00:26 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -53,16 +53,16 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.924255e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.413592e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.413592e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.941162e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.420865e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.420865e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 -TOTAL : 1.746384 sec +TOTAL : 1.745821 sec INFO: No Floating Point Exceptions have been reported - 5,610,656,254 cycles # 2.840 GHz - 10,225,693,233 instructions # 1.82 insn per cycle - 2.033039131 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge + 5,608,708,868 cycles # 2.838 GHz + 10,190,752,473 instructions # 1.82 insn per cycle + 2.033892232 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) @@ -70,19 +70,19 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo ==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 Avg ME (F77/GPU) = 1.2828112125134794E-002 Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -90,28 +90,28 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.059133e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.253480e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.253480e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.055357e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.251163e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.251163e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.420919 sec +TOTAL : 6.442554 sec INFO: No Floating Point Exceptions have been reported - 18,381,343,915 cycles # 2.860 GHz - 43,656,614,838 instructions # 2.38 insn per cycle - 6.427111291 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) + 18,431,262,224 cycles # 2.859 GHz + 43,659,496,470 instructions # 2.37 insn per cycle + 6.448903506 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039854866802E-002 Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -119,28 +119,28 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.109916e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.152195e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.152195e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.110619e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.160777e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.160777e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.495245 sec +TOTAL : 3.492633 sec INFO: No Floating Point Exceptions have been reported - 9,996,777,274 cycles # 2.856 GHz - 23,243,853,177 instructions # 2.33 insn per cycle - 3.501517806 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1938) (avx2: 0) (512y: 0) (512z: 0) + 9,993,500,583 cycles # 2.857 GHz + 23,243,476,984 instructions # 2.33 insn per cycle + 3.498991107 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1938) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039280066150E-002 Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -148,28 +148,28 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.267470e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.372677e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.372677e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.275264e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.392372e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.392372e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.282249 sec +TOTAL : 3.272099 sec INFO: No Floating Point Exceptions have been reported - 8,999,301,502 cycles # 2.737 GHz - 16,711,367,029 instructions # 1.86 insn per cycle - 3.288600293 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2596) (512y: 0) (512z: 0) + 9,005,707,266 cycles # 2.748 GHz + 16,711,349,389 instructions # 1.86 insn per cycle + 3.278668519 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2596) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -177,28 +177,28 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.300798e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.453366e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.453366e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.297831e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.443567e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.443567e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.237585 sec +TOTAL : 3.244587 sec INFO: No Floating Point Exceptions have been reported - 8,910,454,271 cycles # 2.748 GHz - 16,548,951,381 instructions # 1.86 insn per cycle - 3.243990670 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) + 8,928,752,660 cycles # 2.747 GHz + 16,549,135,089 instructions # 1.85 insn per cycle + 3.250993607 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -206,22 +206,22 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.242765e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.310475e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.310475e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.241559e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.306249e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.306249e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 3.319641 sec +TOTAL : 3.324488 sec INFO: No Floating Point Exceptions have been reported - 7,375,675,607 cycles # 2.218 GHz - 14,070,638,355 instructions # 1.91 insn per cycle - 3.325979475 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) + 7,378,511,382 cycles # 2.216 GHz + 14,071,008,703 instructions # 1.91 insn per cycle + 3.330848983 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052585973637E-002 Relative difference = 2.0158743040564767e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt index 86b18beeca..8016aaf3c8 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-15_09:29:16 +DATE: 2024-05-16_15:11:30 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.316146e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.184478e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.273286e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.314779e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.179276e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.254245e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371863e-02 +- 3.269951e-06 ) GeV^0 -TOTAL : 1.208576 sec +TOTAL : 1.213916 sec INFO: No Floating Point Exceptions have been reported - 4,060,487,484 cycles # 2.838 GHz - 6,545,680,426 instructions # 1.61 insn per cycle - 1.488008057 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --common + 4,089,914,869 cycles # 2.847 GHz + 6,594,462,327 instructions # 1.61 insn per cycle + 1.494122889 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common ==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 Avg ME (F77/GPU) = 1.2828112125134794E-002 Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.079034e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.280414e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.280414e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.089350e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.292539e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.292539e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 6.551180 sec +TOTAL : 6.490729 sec INFO: No Floating Point Exceptions have been reported - 18,753,021,194 cycles # 2.861 GHz - 43,690,910,218 instructions # 2.33 insn per cycle - 6.556344298 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) + 18,740,706,935 cycles # 2.886 GHz + 43,689,321,367 instructions # 2.33 insn per cycle + 6.495941000 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039854866802E-002 Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.211903e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.382753e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.382753e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.213519e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.407340e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.407340e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 3.572564 sec +TOTAL : 3.565494 sec INFO: No Floating Point Exceptions have been reported - 10,230,948,768 cycles # 2.860 GHz - 21,990,392,157 instructions # 2.15 insn per cycle - 3.577833075 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1938) (avx2: 0) (512y: 0) (512z: 0) + 10,288,737,724 cycles # 2.883 GHz + 21,988,558,280 instructions # 2.14 insn per cycle + 3.570732391 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1938) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039280066150E-002 Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.375026e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.621400e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.621400e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.392103e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.656361e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.656361e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.365598 sec +TOTAL : 3.347045 sec INFO: No Floating Point Exceptions have been reported - 9,260,613,801 cycles # 2.748 GHz - 15,502,462,040 instructions # 1.67 insn per cycle - 3.370856404 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2596) (512y: 0) (512z: 0) + 9,294,224,919 cycles # 2.774 GHz + 15,502,535,760 instructions # 1.67 insn per cycle + 3.352354405 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2596) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.399053e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.676729e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.676729e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.411853e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.706271e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.706271e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.348363 sec +TOTAL : 3.334245 sec INFO: No Floating Point Exceptions have been reported - 9,213,003,046 cycles # 2.748 GHz - 15,140,470,998 instructions # 1.64 insn per cycle - 3.353630138 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) + 9,229,877,586 cycles # 2.765 GHz + 15,144,508,612 instructions # 1.64 insn per cycle + 3.339505215 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.357732e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.568234e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.568234e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.389642e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.623022e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.623022e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.398550 sec +TOTAL : 3.358884 sec INFO: No Floating Point Exceptions have been reported - 7,618,469,719 cycles # 2.239 GHz - 12,576,308,405 instructions # 1.65 insn per cycle - 3.403870283 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) + 7,623,474,420 cycles # 2.266 GHz + 12,573,351,599 instructions # 1.65 insn per cycle + 3.364654068 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052585973637E-002 Relative difference = 2.0158743040564767e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt index 1737a62fd3..3bd2ee01ac 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-15_09:26:31 +DATE: 2024-05-16_15:08:45 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 12 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.318359e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.192916e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.284003e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.323461e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.185145e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.269757e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.876266 sec +TOTAL : 0.877821 sec INFO: No Floating Point Exceptions have been reported - 3,110,258,101 cycles # 2.830 GHz - 6,386,329,379 instructions # 2.05 insn per cycle - 1.155797878 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst + 3,113,911,295 cycles # 2.829 GHz + 6,352,740,713 instructions # 2.04 insn per cycle + 1.157340966 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 Avg ME (F77/GPU) = 1.2828112125134794E-002 Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.080174e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.280907e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.280907e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.080933e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.281840e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.281840e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.203279 sec +TOTAL : 6.198018 sec INFO: No Floating Point Exceptions have been reported - 17,751,642,679 cycles # 2.860 GHz - 43,511,106,202 instructions # 2.45 insn per cycle - 6.208474005 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) + 17,744,200,571 cycles # 2.861 GHz + 43,507,633,337 instructions # 2.45 insn per cycle + 6.203254296 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039854866802E-002 Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.214412e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.383137e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.383137e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.206413e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.379725e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.379725e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.225748 sec +TOTAL : 3.236598 sec INFO: No Floating Point Exceptions have been reported - 9,232,921,103 cycles # 2.858 GHz - 21,907,138,182 instructions # 2.37 insn per cycle - 3.231059357 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1938) (avx2: 0) (512y: 0) (512z: 0) + 9,264,626,353 cycles # 2.859 GHz + 21,909,129,569 instructions # 2.36 insn per cycle + 3.241826343 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1938) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039280066150E-002 Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.380021e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.627341e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.627341e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.378945e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.622195e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.622195e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.019963 sec +TOTAL : 3.023073 sec INFO: No Floating Point Exceptions have been reported - 8,287,021,575 cycles # 2.740 GHz - 15,592,507,037 instructions # 1.88 insn per cycle - 3.025235012 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2596) (512y: 0) (512z: 0) + 8,285,944,372 cycles # 2.737 GHz + 15,591,046,995 instructions # 1.88 insn per cycle + 3.028351636 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2596) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.401224e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.676523e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.676523e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.406944e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.689990e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.689990e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.998514 sec +TOTAL : 2.991499 sec INFO: No Floating Point Exceptions have been reported - 8,210,791,486 cycles # 2.734 GHz - 15,428,751,418 instructions # 1.88 insn per cycle - 3.003711640 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) + 8,215,974,247 cycles # 2.742 GHz + 15,434,394,808 instructions # 1.88 insn per cycle + 2.996736921 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.356959e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.566697e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.566697e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.365448e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.582442e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.582442e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 3.049340 sec +TOTAL : 3.039216 sec INFO: No Floating Point Exceptions have been reported - 6,623,797,614 cycles # 2.170 GHz - 12,865,182,847 instructions # 1.94 insn per cycle - 3.054557683 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) + 6,609,003,865 cycles # 2.172 GHz + 12,863,939,056 instructions # 1.95 insn per cycle + 3.044504803 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052585973637E-002 Relative difference = 2.0158743040564767e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt index 0b2b54a7e3..ef6806658f 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt @@ -40,177 +40,177 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-15_09:23:47 +DATE: 2024-05-16_15:06:01 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --rmbhst OMP= WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.759670e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.145239e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.143418e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.717298e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.145539e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.143407e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 -TOTAL : 1.524875 sec +TOTAL : 1.533112 sec INFO: No Floating Point Exceptions have been reported - 4,989,316,542 cycles # 2.839 GHz - 9,082,846,394 instructions # 1.82 insn per cycle - 1.813458953 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst + 4,980,418,158 cycles # 2.839 GHz + 9,119,342,139 instructions # 1.83 insn per cycle + 1.812784805 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 Avg ME (F77/GPU) = 1.2828112125134794E-002 Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.080048e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.281576e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.281576e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.069115e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.265540e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.265540e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.203506 sec +TOTAL : 6.262605 sec INFO: No Floating Point Exceptions have been reported - 17,757,530,244 cycles # 2.861 GHz - 43,508,006,160 instructions # 2.45 insn per cycle - 6.208727967 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) + 17,921,464,120 cycles # 2.860 GHz + 43,508,155,770 instructions # 2.43 insn per cycle + 6.267871711 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039854866802E-002 Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.206812e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.375863e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.375863e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.205993e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.384022e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.384022e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.237434 sec +TOTAL : 3.237394 sec INFO: No Floating Point Exceptions have been reported - 9,259,434,414 cycles # 2.856 GHz - 21,908,038,508 instructions # 2.37 insn per cycle - 3.242611568 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1938) (avx2: 0) (512y: 0) (512z: 0) + 9,270,351,681 cycles # 2.860 GHz + 21,907,147,046 instructions # 2.36 insn per cycle + 3.242634203 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1938) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039280066150E-002 Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.382009e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.633872e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.633872e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.379256e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.625966e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.625966e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.016863 sec +TOTAL : 3.019248 sec INFO: No Floating Point Exceptions have been reported - 8,280,779,950 cycles # 2.741 GHz - 15,591,245,776 instructions # 1.88 insn per cycle - 3.022171201 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2596) (512y: 0) (512z: 0) + 8,297,873,717 cycles # 2.744 GHz + 15,590,905,283 instructions # 1.88 insn per cycle + 3.024598202 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2596) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.404263e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.681230e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.681230e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.398987e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.671948e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.671948e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.994546 sec +TOTAL : 3.000824 sec INFO: No Floating Point Exceptions have been reported - 8,214,677,647 cycles # 2.739 GHz - 15,434,585,139 instructions # 1.88 insn per cycle - 2.999808256 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) + 8,223,361,227 cycles # 2.736 GHz + 15,434,590,756 instructions # 1.88 insn per cycle + 3.006023707 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.362407e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.574940e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.574940e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.360241e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.570739e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.570739e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 3.043437 sec +TOTAL : 3.046199 sec INFO: No Floating Point Exceptions have been reported - 6,615,257,438 cycles # 2.171 GHz - 12,865,082,178 instructions # 1.94 insn per cycle - 3.048740210 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) + 6,615,724,908 cycles # 2.169 GHz + 12,863,710,849 instructions # 1.94 insn per cycle + 3.051492012 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052585973637E-002 Relative difference = 2.0158743040564767e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt index 995450060c..b613786442 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-15_08:51:36 +DATE: 2024-05-16_14:33:39 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/gcheck.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.597215e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.332061e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.336412e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.604585e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.336536e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.343718e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.570131 sec +TOTAL : 0.571057 sec INFO: No Floating Point Exceptions have been reported - 2,262,800,220 cycles # 2.852 GHz - 3,529,650,865 instructions # 1.56 insn per cycle - 0.850224222 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/gcheck.exe -p 2048 256 1 + 2,242,744,669 cycles # 2.822 GHz + 3,531,920,926 instructions # 1.57 insn per cycle + 0.851832101 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 95 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 Avg ME (F77/GPU) = 1.2828112125134794E-002 Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.165199e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.400279e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.400279e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.153085e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.384746e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.384746e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 5.768753 sec +TOTAL : 5.830325 sec INFO: No Floating Point Exceptions have been reported - 16,702,300,100 cycles # 2.893 GHz - 41,266,069,530 instructions # 2.47 insn per cycle - 5.773788428 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 375) (avx2: 0) (512y: 0) (512z: 0) + 16,691,813,815 cycles # 2.861 GHz + 41,266,181,474 instructions # 2.47 insn per cycle + 5.835359179 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 375) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039854866802E-002 Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.303137e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.575063e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.575063e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.304367e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.587722e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.587722e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.111032 sec +TOTAL : 3.109672 sec INFO: No Floating Point Exceptions have been reported - 9,008,169,469 cycles # 2.892 GHz - 21,210,986,670 instructions # 2.35 insn per cycle - 3.116090380 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1843) (avx2: 0) (512y: 0) (512z: 0) + 8,995,426,679 cycles # 2.889 GHz + 21,211,089,826 instructions # 2.36 insn per cycle + 3.114839321 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1843) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039280066150E-002 Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.392977e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.662301e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.662301e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.420820e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.716893e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.716893e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.001977 sec +TOTAL : 2.969606 sec INFO: No Floating Point Exceptions have been reported - 8,314,602,138 cycles # 2.766 GHz - 15,425,291,626 instructions # 1.86 insn per cycle - 3.007014639 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2537) (512y: 0) (512z: 0) + 8,272,952,138 cycles # 2.782 GHz + 15,425,102,157 instructions # 1.86 insn per cycle + 2.974640700 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2537) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.476383e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.842526e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.842526e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.475869e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.854556e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.854556e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.911880 sec +TOTAL : 2.912175 sec INFO: No Floating Point Exceptions have been reported - 8,105,425,569 cycles # 2.780 GHz - 15,232,647,489 instructions # 1.88 insn per cycle - 2.916884995 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2423) (512y: 8) (512z: 0) + 8,117,590,540 cycles # 2.783 GHz + 15,233,342,033 instructions # 1.88 insn per cycle + 2.917189383 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2423) (512y: 8) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.416684e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.676680e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.676680e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.412788e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.668874e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.668874e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.977366 sec +TOTAL : 2.982256 sec INFO: No Floating Point Exceptions have been reported - 6,591,431,955 cycles # 2.211 GHz - 12,844,185,156 instructions # 1.95 insn per cycle - 2.982248175 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1706) (512y: 18) (512z: 1427) + 6,592,409,084 cycles # 2.208 GHz + 12,843,659,599 instructions # 1.95 insn per cycle + 2.987368722 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1706) (512y: 18) (512z: 1427) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052564145764E-002 Relative difference = 1.9988585667912256e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt index 0b9b4485ca..e6d46e5965 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-15_09:09:00 +DATE: 2024-05-16_14:51:09 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/gcheck.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.300754e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.188837e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.289601e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.307514e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.195447e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.293637e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.584304 sec +TOTAL : 0.586214 sec INFO: No Floating Point Exceptions have been reported - 2,283,875,556 cycles # 2.814 GHz - 3,582,969,727 instructions # 1.57 insn per cycle - 0.868214226 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/gcheck.exe -p 2048 256 1 + 2,288,759,129 cycles # 2.822 GHz + 3,563,945,826 instructions # 1.56 insn per cycle + 0.869586754 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 Avg ME (F77/GPU) = 1.2828112125134794E-002 Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.599189e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.083386e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.083386e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.594420e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.075669e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.075669e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 4.311622 sec +TOTAL : 4.325231 sec INFO: No Floating Point Exceptions have been reported - 12,169,175,832 cycles # 2.820 GHz - 32,427,435,629 instructions # 2.66 insn per cycle - 4.316983262 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 312) (avx2: 0) (512y: 0) (512z: 0) + 12,164,411,956 cycles # 2.810 GHz + 32,427,707,417 instructions # 2.67 insn per cycle + 4.330470336 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 312) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039840314887E-002 Relative difference = 1.244813035273009e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.616768e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.420514e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.420514e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.607791e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.429113e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.429113e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.789822 sec +TOTAL : 2.796823 sec INFO: No Floating Point Exceptions have been reported - 7,986,693,869 cycles # 2.858 GHz - 18,657,951,154 instructions # 2.34 insn per cycle - 2.795311494 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1555) (avx2: 0) (512y: 0) (512z: 0) + 8,009,111,157 cycles # 2.859 GHz + 18,657,618,704 instructions # 2.33 insn per cycle + 2.802139139 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1555) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039283704129E-002 Relative difference = 5.583829420356249e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.713667e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.460081e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.460081e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.719759e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.472647e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.472647e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.696640 sec +TOTAL : 2.690741 sec INFO: No Floating Point Exceptions have been reported - 7,429,692,675 cycles # 2.750 GHz - 14,251,991,061 instructions # 1.92 insn per cycle - 2.702185959 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2237) (512y: 0) (512z: 0) + 7,416,864,109 cycles # 2.752 GHz + 14,251,974,045 instructions # 1.92 insn per cycle + 2.696083346 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2237) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053244447801E-002 Relative difference = 2.5291823782248813e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.769386e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.614912e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.614912e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.774877e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.630440e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.630440e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.649318 sec +TOTAL : 2.645076 sec INFO: No Floating Point Exceptions have been reported - 7,300,524,783 cycles # 2.751 GHz - 13,948,352,873 instructions # 1.91 insn per cycle - 2.654703070 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2096) (512y: 3) (512z: 0) + 7,291,130,406 cycles # 2.752 GHz + 13,948,384,567 instructions # 1.91 insn per cycle + 2.650598467 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2096) (512y: 3) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053244447801E-002 Relative difference = 2.5291823782248813e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.423538e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.721341e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.721341e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.434115e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.741003e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.741003e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.973336 sec +TOTAL : 2.962177 sec INFO: No Floating Point Exceptions have been reported - 6,497,241,633 cycles # 2.182 GHz - 13,423,441,698 instructions # 2.07 insn per cycle - 2.978622035 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2071) (512y: 1) (512z: 1198) + 6,479,327,720 cycles # 2.184 GHz + 13,423,401,797 instructions # 2.07 insn per cycle + 2.967420151 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2071) (512y: 1) (512z: 1198) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052562326775E-002 Relative difference = 1.997440588685788e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt index 3908c86ece..1851f3246c 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-15_09:09:24 +DATE: 2024-05-16_14:51:33 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/gcheck.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.310423e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.204445e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.337304e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.308899e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.200904e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.329787e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.584795 sec +TOTAL : 0.585570 sec INFO: No Floating Point Exceptions have been reported - 2,287,699,290 cycles # 2.820 GHz - 3,599,028,569 instructions # 1.57 insn per cycle - 0.868441535 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/gcheck.exe -p 2048 256 1 + 2,293,480,451 cycles # 2.820 GHz + 3,552,176,680 instructions # 1.55 insn per cycle + 0.870100804 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 95 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 Avg ME (F77/GPU) = 1.2828112125134794E-002 Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.136531e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.094682e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.094682e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.129885e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.080551e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.080551e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.319757 sec +TOTAL : 3.330346 sec INFO: No Floating Point Exceptions have been reported - 9,434,042,205 cycles # 2.838 GHz - 25,263,365,170 instructions # 2.68 insn per cycle - 3.325108580 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 263) (avx2: 0) (512y: 0) (512z: 0) + 9,425,530,261 cycles # 2.826 GHz + 25,263,309,757 instructions # 2.68 insn per cycle + 3.335509619 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 263) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039838495897E-002 Relative difference = 1.2589928273811243e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.951052e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.482214e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.482214e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.953227e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.493970e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.493970e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.517959 sec +TOTAL : 2.514743 sec INFO: No Floating Point Exceptions have been reported - 7,205,603,839 cycles # 2.856 GHz - 16,869,214,278 instructions # 2.34 insn per cycle - 2.523300575 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1360) (avx2: 0) (512y: 0) (512z: 0) + 7,199,101,915 cycles # 2.858 GHz + 16,870,111,415 instructions # 2.34 insn per cycle + 2.520226033 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1360) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039280066150E-002 Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.817378e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.753095e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.753095e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.869871e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.903620e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.903620e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.609243 sec +TOTAL : 2.568153 sec INFO: No Floating Point Exceptions have been reported - 7,162,444,501 cycles # 2.741 GHz - 13,618,285,483 instructions # 1.90 insn per cycle - 2.614554681 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2060) (512y: 0) (512z: 0) + 7,089,400,745 cycles # 2.756 GHz + 13,616,924,187 instructions # 1.92 insn per cycle + 2.573571442 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2060) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053220800939E-002 Relative difference = 2.5107486628541925e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.905079e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.003899e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.003899e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.911116e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.034757e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.034757e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.541524 sec +TOTAL : 2.534380 sec INFO: No Floating Point Exceptions have been reported - 7,015,769,981 cycles # 2.756 GHz - 13,432,482,610 instructions # 1.91 insn per cycle - 2.547061446 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1945) (512y: 4) (512z: 0) + 7,042,060,221 cycles # 2.774 GHz + 13,426,671,587 instructions # 1.91 insn per cycle + 2.539847169 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1945) (512y: 4) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053220800939E-002 Relative difference = 2.5107486628541925e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.524560e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.960147e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.960147e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.525985e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.967300e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.967300e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.872532 sec +TOTAL : 2.871748 sec INFO: No Floating Point Exceptions have been reported - 6,324,405,863 cycles # 2.198 GHz - 13,154,939,543 instructions # 2.08 insn per cycle - 2.877932546 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2029) (512y: 1) (512z: 1083) + 6,325,625,286 cycles # 2.199 GHz + 13,154,721,049 instructions # 2.08 insn per cycle + 2.877120825 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2029) (512y: 1) (512z: 1083) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052536860923E-002 Relative difference = 1.977588895209662e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index cea92017f4..b626a014f8 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-15_08:52:02 +DATE: 2024-05-16_14:34:05 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/gcheck.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.804910e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.941947e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.165172e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.830025e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.944832e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.160865e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.666038 sec +TOTAL : 0.662041 sec INFO: No Floating Point Exceptions have been reported - 2,590,589,949 cycles # 2.854 GHz - 3,955,758,741 instructions # 1.53 insn per cycle - 0.976084714 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/gcheck.exe -p 2048 256 1 + 2,559,219,510 cycles # 2.861 GHz + 3,969,506,530 instructions # 1.55 insn per cycle + 0.952802853 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 Avg ME (F77/GPU) = 1.2828039901590279E-002 Relative difference = 7.671454200650844e-09 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.043340e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.219809e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.219809e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.044346e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.221236e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.221236e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.446471 sec +TOTAL : 6.439706 sec INFO: No Floating Point Exceptions have been reported - 18,657,215,844 cycles # 2.893 GHz - 44,222,139,338 instructions # 2.37 insn per cycle - 6.451661658 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 439) (avx2: 0) (512y: 0) (512z: 0) + 18,648,827,254 cycles # 2.894 GHz + 44,218,351,924 instructions # 2.37 insn per cycle + 6.444755062 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 439) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.637106e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.164315e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.164315e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.634240e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.158489e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.158489e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.261574 sec +TOTAL : 4.270020 sec INFO: No Floating Point Exceptions have been reported - 12,336,601,747 cycles # 2.892 GHz - 30,918,015,015 instructions # 2.51 insn per cycle - 4.266662636 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1685) (avx2: 0) (512y: 0) (512z: 0) + 12,337,216,169 cycles # 2.886 GHz + 30,918,100,190 instructions # 2.51 insn per cycle + 4.275170664 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1685) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.948140e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.709136e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.709136e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.943703e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.696046e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.696046e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.645911 sec +TOTAL : 3.651391 sec INFO: No Floating Point Exceptions have been reported - 10,085,902,604 cycles # 2.763 GHz - 19,374,141,938 instructions # 1.92 insn per cycle - 3.651090079 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2130) (512y: 0) (512z: 0) + 10,097,284,751 cycles # 2.762 GHz + 19,374,074,587 instructions # 1.92 insn per cycle + 3.656592402 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2130) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.016172e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.836535e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.836535e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.039225e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.880994e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.880994e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.534019 sec +TOTAL : 3.498933 sec INFO: No Floating Point Exceptions have been reported - 9,731,602,864 cycles # 2.751 GHz - 18,955,422,118 instructions # 1.95 insn per cycle - 3.539225360 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1860) (512y: 188) (512z: 0) + 9,699,890,764 cycles # 2.769 GHz + 18,944,296,026 instructions # 1.95 insn per cycle + 3.504313379 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1860) (512y: 188) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.768535e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.361876e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.361876e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.766168e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.359450e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.359450e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.973258 sec +TOTAL : 3.977432 sec INFO: No Floating Point Exceptions have been reported - 8,365,805,581 cycles # 2.103 GHz - 15,056,311,694 instructions # 1.80 insn per cycle - 3.978388965 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1024) (512y: 155) (512z: 1316) + 8,362,626,878 cycles # 2.101 GHz + 15,058,722,791 instructions # 1.80 insn per cycle + 3.982532855 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1024) (512y: 155) (512z: 1316) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt index d024b80fdf..f9780717c1 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-15_08:52:32 +DATE: 2024-05-16_14:34:35 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/gcheck.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.800084e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.931545e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.154097e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.831074e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.944999e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.163112e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.666442 sec +TOTAL : 0.663702 sec INFO: No Floating Point Exceptions have been reported - 2,566,199,127 cycles # 2.850 GHz - 3,940,059,674 instructions # 1.54 insn per cycle - 0.964208878 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/gcheck.exe -p 2048 256 1 + 2,550,713,530 cycles # 2.845 GHz + 3,995,712,636 instructions # 1.57 insn per cycle + 0.958037940 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 Avg ME (F77/GPU) = 1.2828039901590279E-002 Relative difference = 7.671454200650844e-09 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.080432e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.270816e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.270816e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.088480e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.281697e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.281697e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.239499 sec +TOTAL : 6.193535 sec INFO: No Floating Point Exceptions have been reported - 17,935,798,795 cycles # 2.873 GHz - 42,467,929,133 instructions # 2.37 insn per cycle - 6.244620252 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 421) (avx2: 0) (512y: 0) (512z: 0) + 17,967,058,694 cycles # 2.899 GHz + 42,467,805,223 instructions # 2.36 insn per cycle + 6.198684795 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 421) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.648560e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.192749e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.192749e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.676284e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.231904e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.231904e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.239099 sec +TOTAL : 4.170684 sec INFO: No Floating Point Exceptions have been reported - 12,133,810,960 cycles # 2.860 GHz - 30,225,049,807 instructions # 2.49 insn per cycle - 4.244104452 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1692) (avx2: 0) (512y: 0) (512z: 0) + 12,134,694,075 cycles # 2.906 GHz + 30,224,929,059 instructions # 2.49 insn per cycle + 4.175943490 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1692) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.934430e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.704468e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.704468e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.950812e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.735198e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.735198e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.672897 sec +TOTAL : 3.639278 sec INFO: No Floating Point Exceptions have been reported - 10,041,877,066 cycles # 2.731 GHz - 19,257,423,409 instructions # 1.92 insn per cycle - 3.678067586 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2146) (512y: 0) (512z: 0) + 10,078,657,444 cycles # 2.766 GHz + 19,257,126,653 instructions # 1.91 insn per cycle + 3.644365244 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2146) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.017754e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.858643e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.858643e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.049769e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.898049e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.898049e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.538767 sec +TOTAL : 3.481211 sec INFO: No Floating Point Exceptions have been reported - 9,651,667,831 cycles # 2.725 GHz - 18,756,471,118 instructions # 1.94 insn per cycle - 3.544018799 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1834) (512y: 191) (512z: 0) + 9,647,917,970 cycles # 2.768 GHz + 18,746,418,128 instructions # 1.94 insn per cycle + 3.486360008 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1834) (512y: 191) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check.exe -p 2048 256 12 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.771200e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.372716e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.372716e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.796433e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.409552e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.409552e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.971695 sec +TOTAL : 3.919050 sec INFO: No Floating Point Exceptions have been reported - 8,223,244,431 cycles # 2.068 GHz - 14,980,053,623 instructions # 1.82 insn per cycle - 3.976869579 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1021) (512y: 156) (512z: 1305) + 8,244,471,456 cycles # 2.102 GHz + 14,980,246,059 instructions # 1.82 insn per cycle + 3.924194596 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1021) (512y: 156) (512z: 1305) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index 6c79fc5a8a..205a4bf5b6 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_08:53:02 +DATE: 2024-05-16_14:35:05 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.190319e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.181403e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.277211e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.201162e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.181610e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.277713e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.521181 sec +TOTAL : 0.525559 sec INFO: No Floating Point Exceptions have been reported - 2,125,639,390 cycles # 2.821 GHz - 3,088,425,234 instructions # 1.45 insn per cycle - 0.810012096 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 + 2,155,305,398 cycles # 2.849 GHz + 3,120,666,963 instructions # 1.45 insn per cycle + 0.814520269 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 Avg ME (F77/GPU) = 2.0288063388516822 Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.059856e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.120844e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.120844e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.068773e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.129905e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.129905e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.198224 sec +TOTAL : 5.176250 sec INFO: No Floating Point Exceptions have been reported - 15,010,227,772 cycles # 2.885 GHz - 38,376,294,847 instructions # 2.56 insn per cycle - 5.203572793 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) + 15,001,077,825 cycles # 2.896 GHz + 38,374,710,401 instructions # 2.56 insn per cycle + 5.181415080 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515645 Relative difference = 3.258803994438787e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.494799e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.685037e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.685037e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.492980e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.684039e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.684039e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.108579 sec +TOTAL : 3.109528 sec INFO: No Floating Point Exceptions have been reported - 9,075,186,083 cycles # 2.916 GHz - 24,580,004,547 instructions # 2.71 insn per cycle - 3.113697316 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) + 9,049,547,879 cycles # 2.906 GHz + 24,578,150,431 instructions # 2.72 insn per cycle + 3.114795475 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.569480e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.052925e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.052925e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.554648e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.034559e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.034559e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.990969 sec +TOTAL : 1.996517 sec INFO: No Floating Point Exceptions have been reported - 5,446,723,124 cycles # 2.729 GHz - 11,251,102,203 instructions # 2.07 insn per cycle - 1.996371189 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) + 5,443,502,791 cycles # 2.721 GHz + 11,251,469,346 instructions # 2.07 insn per cycle + 2.001703471 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.155980e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.745118e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.745118e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.119114e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.713742e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.713742e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.810758 sec +TOTAL : 1.821745 sec INFO: No Floating Point Exceptions have been reported - 4,934,818,844 cycles # 2.719 GHz - 10,556,993,904 instructions # 2.14 insn per cycle - 1.816016703 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2074) (512y: 144) (512z: 0) + 4,960,408,882 cycles # 2.716 GHz + 10,558,806,229 instructions # 2.13 insn per cycle + 1.826903839 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2074) (512y: 144) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.705103e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.912750e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.912750e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.693426e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.898518e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.898518e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.937812 sec +TOTAL : 2.947911 sec INFO: No Floating Point Exceptions have been reported - 5,375,981,348 cycles # 1.828 GHz - 7,796,359,488 instructions # 1.45 insn per cycle - 2.943091474 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1542) + 5,367,244,097 cycles # 1.818 GHz + 7,793,958,391 instructions # 1.45 insn per cycle + 2.953294554 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt index 6a936b22f8..4b2366d44f 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt @@ -40,11 +40,11 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_09:18:42 +DATE: 2024-05-16_15:00:55 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -53,16 +53,16 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.325218e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.872629e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.872629e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.373758e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.924060e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.924060e+07 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.833259 sec +TOTAL : 0.825705 sec INFO: No Floating Point Exceptions have been reported - 3,043,304,670 cycles # 2.825 GHz - 4,749,213,667 instructions # 1.56 insn per cycle - 1.135924684 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge + 3,037,157,201 cycles # 2.832 GHz + 4,768,877,833 instructions # 1.57 insn per cycle + 1.128818887 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) @@ -70,19 +70,19 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 Avg ME (F77/GPU) = 2.0288063388516822 Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -90,28 +90,28 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.030538e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.089970e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.089970e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.032947e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.092197e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.092197e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.351690 sec +TOTAL : 5.344113 sec INFO: No Floating Point Exceptions have been reported - 15,328,765,090 cycles # 2.861 GHz - 38,433,892,757 instructions # 2.51 insn per cycle - 5.358519712 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) + 15,315,317,736 cycles # 2.863 GHz + 38,433,762,310 instructions # 2.51 insn per cycle + 5.351126978 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515645 Relative difference = 3.258803994438787e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -119,28 +119,28 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.400584e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.586127e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.586127e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.394451e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.578816e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.578816e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.272002 sec +TOTAL : 3.279010 sec INFO: No Floating Point Exceptions have been reported - 9,380,506,135 cycles # 2.862 GHz - 24,761,479,175 instructions # 2.64 insn per cycle - 3.278737013 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) + 9,390,215,737 cycles # 2.859 GHz + 24,761,602,813 instructions # 2.64 insn per cycle + 3.285914811 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -148,28 +148,28 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.360461e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.815939e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.815939e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.346272e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.804430e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.804430e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.145107 sec +TOTAL : 2.151067 sec INFO: No Floating Point Exceptions have been reported - 5,786,206,777 cycles # 2.690 GHz - 11,538,131,025 instructions # 1.99 insn per cycle - 2.151904282 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) + 5,795,064,676 cycles # 2.687 GHz + 11,538,955,643 instructions # 1.99 insn per cycle + 2.157987463 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -177,28 +177,28 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.933135e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.494047e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.494047e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.949125e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.512113e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.512113e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.957780 sec +TOTAL : 1.953091 sec INFO: No Floating Point Exceptions have been reported - 5,281,685,483 cycles # 2.689 GHz - 10,843,647,063 instructions # 2.05 insn per cycle - 1.964758338 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2074) (512y: 144) (512z: 0) + 5,277,608,562 cycles # 2.695 GHz + 10,845,633,589 instructions # 2.06 insn per cycle + 1.960046746 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2074) (512y: 144) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -206,22 +206,22 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.552106e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.743946e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.743946e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.545325e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.736253e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.736253e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.145788 sec +TOTAL : 3.149111 sec INFO: No Floating Point Exceptions have been reported - 5,721,034,997 cycles # 1.815 GHz - 8,037,891,027 instructions # 1.40 insn per cycle - 3.152685384 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1542) + 5,725,568,726 cycles # 1.815 GHz + 8,037,864,149 instructions # 1.40 insn per cycle + 3.156036160 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt index e7b745031b..66fdf9efe4 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_09:29:45 +DATE: 2024-05-16_15:11:59 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.590971e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.161887e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.277052e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.582142e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.158915e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.274993e+08 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 0.632801 sec +TOTAL : 0.629579 sec INFO: No Floating Point Exceptions have been reported - 2,472,079,288 cycles # 2.821 GHz - 3,594,328,826 instructions # 1.45 insn per cycle - 0.934637105 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --common + 2,438,671,292 cycles # 2.828 GHz + 3,557,518,240 instructions # 1.46 insn per cycle + 0.918692112 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 Avg ME (F77/GPU) = 2.0288063388516822 Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.044246e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.104768e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.104768e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.063642e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.124319e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.124319e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 5.300004 sec +TOTAL : 5.250994 sec INFO: No Floating Point Exceptions have been reported - 15,168,137,122 cycles # 2.860 GHz - 38,391,021,057 instructions # 2.53 insn per cycle - 5.305606427 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) + 15,177,224,624 cycles # 2.888 GHz + 38,389,589,114 instructions # 2.53 insn per cycle + 5.256694767 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515645 Relative difference = 3.258803994438787e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.433972e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.620984e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.620984e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.469246e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.659787e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.659787e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.223387 sec +TOTAL : 3.192043 sec INFO: No Floating Point Exceptions have been reported - 9,242,671,203 cycles # 2.863 GHz - 24,577,390,932 instructions # 2.66 insn per cycle - 3.228925344 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) + 9,234,869,625 cycles # 2.889 GHz + 24,577,322,685 instructions # 2.66 insn per cycle + 3.197667860 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.369807e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.835324e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.835324e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.520662e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.999169e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.999169e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.125214 sec +TOTAL : 2.069942 sec INFO: No Floating Point Exceptions have been reported - 5,666,161,085 cycles # 2.661 GHz - 11,235,267,252 instructions # 1.98 insn per cycle - 2.130780371 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) + 5,642,462,557 cycles # 2.720 GHz + 11,233,692,701 instructions # 1.99 insn per cycle + 2.075542898 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.066147e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.655529e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.655529e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.151383e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.740134e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.740134e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.900125 sec +TOTAL : 1.875924 sec INFO: No Floating Point Exceptions have been reported - 5,120,318,153 cycles # 2.688 GHz - 10,506,045,544 instructions # 2.05 insn per cycle - 1.905757522 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2074) (512y: 144) (512z: 0) + 5,122,190,825 cycles # 2.724 GHz + 10,508,387,782 instructions # 2.05 insn per cycle + 1.881606947 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2074) (512y: 144) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.596945e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.792278e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.792278e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.617306e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.815381e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.815381e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.086982 sec +TOTAL : 3.070946 sec INFO: No Floating Point Exceptions have been reported - 5,549,243,721 cycles # 1.795 GHz - 7,742,811,855 instructions # 1.40 insn per cycle - 3.092448899 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1542) + 5,582,158,144 cycles # 1.816 GHz + 7,742,870,902 instructions # 1.39 insn per cycle + 3.076599052 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt index 118dde3b8c..82194f6fe3 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_09:26:58 +DATE: 2024-05-16_15:09:12 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 2 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.590472e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.159111e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.276500e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.587611e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.161872e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.276844e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.568515 sec +TOTAL : 0.568860 sec INFO: No Floating Point Exceptions have been reported - 2,258,768,488 cycles # 2.823 GHz - 3,528,957,770 instructions # 1.56 insn per cycle - 0.856852838 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst + 2,269,706,021 cycles # 2.822 GHz + 3,484,022,632 instructions # 1.54 insn per cycle + 0.860923648 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 Avg ME (F77/GPU) = 2.0288063388516822 Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.044700e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.105241e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.105241e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.045564e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.105865e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.105865e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.237438 sec +TOTAL : 5.235402 sec INFO: No Floating Point Exceptions have been reported - 15,001,745,020 cycles # 2.862 GHz - 38,376,029,222 instructions # 2.56 insn per cycle - 5.242948759 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) + 14,997,948,844 cycles # 2.862 GHz + 38,373,416,469 instructions # 2.56 insn per cycle + 5.240872993 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515645 Relative difference = 3.258803994438787e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.433268e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.620817e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.620817e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.444491e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.632712e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.632712e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.163897 sec +TOTAL : 3.153505 sec INFO: No Floating Point Exceptions have been reported - 9,061,806,813 cycles # 2.860 GHz - 24,577,933,300 instructions # 2.71 insn per cycle - 3.169521571 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) + 9,049,779,346 cycles # 2.866 GHz + 24,577,971,625 instructions # 2.72 insn per cycle + 3.158944927 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.470565e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.942954e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.942954e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.437117e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.904229e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.904229e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.026313 sec +TOTAL : 2.038232 sec INFO: No Floating Point Exceptions have been reported - 5,449,900,362 cycles # 2.684 GHz - 11,251,125,518 instructions # 2.06 insn per cycle - 2.031793334 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) + 5,473,582,641 cycles # 2.680 GHz + 11,251,858,191 instructions # 2.06 insn per cycle + 2.043714380 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.065178e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.648113e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.648113e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.066218e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.650713e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.650713e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.838036 sec +TOTAL : 1.837947 sec INFO: No Floating Point Exceptions have been reported - 4,937,951,228 cycles # 2.680 GHz - 10,556,745,903 instructions # 2.14 insn per cycle - 1.843600256 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2074) (512y: 144) (512z: 0) + 4,942,309,563 cycles # 2.682 GHz + 10,557,200,123 instructions # 2.14 insn per cycle + 1.844865568 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2074) (512y: 144) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.582246e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.775292e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.775292e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.598977e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.794496e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.794496e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.036261 sec +TOTAL : 3.023039 sec INFO: No Floating Point Exceptions have been reported - 5,395,290,125 cycles # 1.774 GHz - 7,793,590,643 instructions # 1.44 insn per cycle - 3.041803947 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1542) + 5,367,715,100 cycles # 1.773 GHz + 7,793,769,749 instructions # 1.45 insn per cycle + 3.028517366 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt index 663cad214e..3db0a99453 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt @@ -40,177 +40,177 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_09:24:14 +DATE: 2024-05-16_15:06:28 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --rmbhst OMP= WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.635282e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.156849e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.273055e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.591450e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.156507e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.275190e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.725039 sec +TOTAL : 0.728834 sec INFO: No Floating Point Exceptions have been reported - 2,706,506,365 cycles # 2.828 GHz - 4,257,792,546 instructions # 1.57 insn per cycle - 1.013725794 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst + 2,711,621,820 cycles # 2.826 GHz + 4,288,575,941 instructions # 1.58 insn per cycle + 1.017933550 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 Avg ME (F77/GPU) = 2.0288063388516822 Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.044850e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.105564e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.105564e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.045774e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.105988e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.105988e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.239087 sec +TOTAL : 5.234926 sec INFO: No Floating Point Exceptions have been reported - 15,006,948,586 cycles # 2.862 GHz - 38,376,182,709 instructions # 2.56 insn per cycle - 5.244562024 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) + 14,996,539,700 cycles # 2.862 GHz + 38,373,492,139 instructions # 2.56 insn per cycle + 5.240540958 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515645 Relative difference = 3.258803994438787e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.431762e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.619227e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.619227e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.431303e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.617753e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.617753e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.165721 sec +TOTAL : 3.165641 sec INFO: No Floating Point Exceptions have been reported - 9,066,732,600 cycles # 2.860 GHz - 24,579,797,002 instructions # 2.71 insn per cycle - 3.171274433 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) + 9,072,261,960 cycles # 2.862 GHz + 24,578,342,604 instructions # 2.71 insn per cycle + 3.171145800 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.432440e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.902788e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.902788e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.460196e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.936686e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.936686e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.040399 sec +TOTAL : 2.030403 sec INFO: No Floating Point Exceptions have been reported - 5,447,679,007 cycles # 2.665 GHz - 11,252,372,098 instructions # 2.07 insn per cycle - 2.046086163 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) + 5,452,336,471 cycles # 2.679 GHz + 11,251,160,510 instructions # 2.06 insn per cycle + 2.035938093 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.015340e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.590774e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.590774e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.063893e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.649981e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.649981e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.852256 sec +TOTAL : 1.838982 sec INFO: No Floating Point Exceptions have been reported - 4,964,036,673 cycles # 2.673 GHz - 10,558,519,067 instructions # 2.13 insn per cycle - 1.857802220 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2074) (512y: 144) (512z: 0) + 4,938,631,038 cycles # 2.680 GHz + 10,556,930,414 instructions # 2.14 insn per cycle + 1.844618889 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2074) (512y: 144) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.586047e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.783729e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.783729e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.589787e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.785615e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.785615e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.033201 sec +TOTAL : 3.030446 sec INFO: No Floating Point Exceptions have been reported - 5,388,777,143 cycles # 1.774 GHz - 7,793,579,080 instructions # 1.45 insn per cycle - 3.038681843 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1542) + 5,385,276,295 cycles # 1.774 GHz + 7,793,583,016 instructions # 1.45 insn per cycle + 3.036161028 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt index 4514426c62..0caf1293cf 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_08:53:26 +DATE: 2024-05-16_14:35:28 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.195613e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.184382e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.280839e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.206695e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.183658e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.279171e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.523488 sec +TOTAL : 0.521961 sec INFO: No Floating Point Exceptions have been reported - 2,155,707,026 cycles # 2.861 GHz - 3,121,946,682 instructions # 1.45 insn per cycle - 0.811708595 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/gcheck.exe -p 2048 256 1 + 2,148,802,757 cycles # 2.845 GHz + 3,054,152,486 instructions # 1.42 insn per cycle + 0.812117976 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 Avg ME (F77/GPU) = 2.0288063388516822 Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.040424e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.100582e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.100582e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.068168e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.129039e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.129039e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.248272 sec +TOTAL : 5.177399 sec INFO: No Floating Point Exceptions have been reported - 15,038,247,077 cycles # 2.864 GHz - 40,101,434,918 instructions # 2.67 insn per cycle - 5.253563437 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 669) (avx2: 0) (512y: 0) (512z: 0) + 15,011,872,798 cycles # 2.897 GHz + 40,100,761,049 instructions # 2.67 insn per cycle + 5.182501125 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 669) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.594771e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.800635e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.800635e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.634343e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.844834e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.844834e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.025777 sec +TOTAL : 2.993727 sec INFO: No Floating Point Exceptions have been reported - 8,676,513,577 cycles # 2.863 GHz - 23,671,232,753 instructions # 2.73 insn per cycle - 3.030990589 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2072) (avx2: 0) (512y: 0) (512z: 0) + 8,671,029,072 cycles # 2.892 GHz + 23,670,969,931 instructions # 2.73 insn per cycle + 2.999072752 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2072) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.852634e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.223067e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.223067e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.945254e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.323667e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.323667e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.269618 sec +TOTAL : 2.228700 sec INFO: No Floating Point Exceptions have been reported - 6,075,123,959 cycles # 2.672 GHz - 13,062,379,699 instructions # 2.15 insn per cycle - 2.274773997 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2546) (512y: 0) (512z: 0) + 6,081,438,462 cycles # 2.724 GHz + 13,061,002,322 instructions # 2.15 insn per cycle + 2.233958089 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2546) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.136902e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.549699e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.549699e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.205594e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.622405e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.622405e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.149296 sec +TOTAL : 2.121856 sec INFO: No Floating Point Exceptions have been reported - 5,777,124,671 cycles # 2.683 GHz - 12,323,213,633 instructions # 2.13 insn per cycle - 2.154509165 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2093) (512y: 294) (512z: 0) + 5,798,891,312 cycles # 2.727 GHz + 12,319,969,769 instructions # 2.12 insn per cycle + 2.127030294 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2093) (512y: 294) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.294251e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.457758e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.457758e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.380432e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.550251e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.550251e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.292168 sec +TOTAL : 3.209519 sec INFO: No Floating Point Exceptions have been reported - 5,808,964,628 cycles # 1.762 GHz - 9,602,534,488 instructions # 1.65 insn per cycle - 3.297534438 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1509) (512y: 209) (512z: 1971) + 5,821,355,640 cycles # 1.812 GHz + 9,603,981,726 instructions # 1.65 insn per cycle + 3.214724733 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1509) (512y: 209) (512z: 1971) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt index 39eef86b7e..6af05ea7e1 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_09:09:46 +DATE: 2024-05-16_14:51:55 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.658259e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.170216e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.280112e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.681198e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.166116e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.276872e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.533646 sec +TOTAL : 0.529611 sec INFO: No Floating Point Exceptions have been reported - 2,162,169,608 cycles # 2.818 GHz - 3,108,077,231 instructions # 1.44 insn per cycle - 0.826343650 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/gcheck.exe -p 2048 256 1 + 2,190,477,637 cycles # 2.832 GHz + 3,135,955,530 instructions # 1.43 insn per cycle + 0.830299558 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 Avg ME (F77/GPU) = 2.0288063388516822 Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.369156e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.450097e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.450097e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.383572e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.466296e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.466296e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.534956 sec +TOTAL : 4.507918 sec INFO: No Floating Point Exceptions have been reported - 12,994,240,018 cycles # 2.862 GHz - 34,384,924,238 instructions # 2.65 insn per cycle - 4.540545327 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 686) (avx2: 0) (512y: 0) (512z: 0) + 13,013,442,526 cycles # 2.884 GHz + 34,387,029,075 instructions # 2.64 insn per cycle + 4.513459426 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 686) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.920401e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.054397e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.054397e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.946707e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.083881e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.083881e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.700366 sec +TOTAL : 3.667816 sec INFO: No Floating Point Exceptions have been reported - 10,609,557,944 cycles # 2.864 GHz - 24,007,443,501 instructions # 2.26 insn per cycle - 3.706077406 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2582) (avx2: 0) (512y: 0) (512z: 0) + 10,591,846,077 cycles # 2.884 GHz + 24,007,245,790 instructions # 2.27 insn per cycle + 3.673406920 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2582) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.424475e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.729374e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.729374e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.532632e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.849376e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.849376e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.479913 sec +TOTAL : 2.423240 sec INFO: No Floating Point Exceptions have been reported - 6,622,429,147 cycles # 2.665 GHz - 12,401,540,743 instructions # 1.87 insn per cycle - 2.485646820 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3154) (512y: 0) (512z: 0) + 6,577,855,979 cycles # 2.709 GHz + 12,401,365,684 instructions # 1.89 insn per cycle + 2.428791768 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3154) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516200 Relative difference = 3.2588037208240405e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.642745e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.976938e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.976938e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.754457e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.104775e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.104775e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.368068 sec +TOTAL : 2.314834 sec INFO: No Floating Point Exceptions have been reported - 6,341,758,267 cycles # 2.674 GHz - 11,573,752,166 instructions # 1.83 insn per cycle - 2.373609454 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2690) (512y: 239) (512z: 0) + 6,233,998,487 cycles # 2.688 GHz + 11,576,068,199 instructions # 1.86 insn per cycle + 2.320534715 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2690) (512y: 239) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516200 Relative difference = 3.2588037208240405e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.624072e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.822346e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.822346e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.687851e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.893233e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.893233e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.003083 sec +TOTAL : 2.952132 sec INFO: No Floating Point Exceptions have been reported - 5,336,365,120 cycles # 1.774 GHz - 9,296,649,977 instructions # 1.74 insn per cycle - 3.008817721 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2115) (512y: 282) (512z: 1958) + 5,323,772,693 cycles # 1.802 GHz + 9,296,912,008 instructions # 1.75 insn per cycle + 2.957828928 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2115) (512y: 282) (512z: 1958) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt index f2377a3c10..2040ec21eb 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_09:10:10 +DATE: 2024-05-16_14:52:19 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.654323e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.169281e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.279730e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.680230e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.168644e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.280417e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.531724 sec +TOTAL : 0.532171 sec INFO: No Floating Point Exceptions have been reported - 2,163,666,189 cycles # 2.813 GHz - 3,097,903,315 instructions # 1.43 insn per cycle - 0.825701854 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/gcheck.exe -p 2048 256 1 + 2,169,507,018 cycles # 2.828 GHz + 3,115,355,964 instructions # 1.44 insn per cycle + 0.826043020 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 Avg ME (F77/GPU) = 2.0288063388516822 Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.495064e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.585961e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.585961e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.524819e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.617052e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.617052e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.312328 sec +TOTAL : 4.262483 sec INFO: No Floating Point Exceptions have been reported - 12,359,534,522 cycles # 2.863 GHz - 35,038,045,562 instructions # 2.83 insn per cycle - 4.317909347 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 457) (avx2: 0) (512y: 0) (512z: 0) + 12,358,560,610 cycles # 2.896 GHz + 35,037,446,637 instructions # 2.84 insn per cycle + 4.268207887 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 457) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.901600e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.035182e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.035182e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.908483e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.040450e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.040450e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.723663 sec +TOTAL : 3.714757 sec INFO: No Floating Point Exceptions have been reported - 10,672,478,736 cycles # 2.862 GHz - 23,084,196,191 instructions # 2.16 insn per cycle - 3.729445597 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2363) (avx2: 0) (512y: 0) (512z: 0) + 10,745,562,014 cycles # 2.889 GHz + 23,084,374,218 instructions # 2.15 insn per cycle + 3.720383315 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2363) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.789631e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.152428e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.152428e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.878271e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.246530e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.246530e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.298581 sec +TOTAL : 2.258864 sec INFO: No Floating Point Exceptions have been reported - 6,165,005,368 cycles # 2.677 GHz - 11,957,773,725 instructions # 1.94 insn per cycle - 2.304298716 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2509) (512y: 0) (512z: 0) + 6,151,591,588 cycles # 2.717 GHz + 11,956,808,073 instructions # 1.94 insn per cycle + 2.264473200 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2509) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.925515e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.303447e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.303447e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.958079e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.345089e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.345089e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.238206 sec +TOTAL : 2.224234 sec INFO: No Floating Point Exceptions have been reported - 5,999,491,603 cycles # 2.675 GHz - 11,131,162,062 instructions # 1.86 insn per cycle - 2.243685672 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2126) (512y: 174) (512z: 0) + 6,017,653,055 cycles # 2.700 GHz + 11,128,128,624 instructions # 1.85 insn per cycle + 2.229785356 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2126) (512y: 174) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.712662e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.921795e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.921795e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.739650e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.951827e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.951827e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.934330 sec +TOTAL : 2.913360 sec INFO: No Floating Point Exceptions have been reported - 5,221,479,602 cycles # 1.777 GHz - 9,021,223,339 instructions # 1.73 insn per cycle - 2.939902314 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1650) (512y: 208) (512z: 1567) + 5,212,798,448 cycles # 1.786 GHz + 9,020,884,070 instructions # 1.73 insn per cycle + 2.919040069 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1650) (512y: 208) (512z: 1567) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 21615bfbd9..93f412dad4 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_08:53:50 +DATE: 2024-05-16_14:35:52 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.087934e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.711966e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.976794e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.088595e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.705968e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.969781e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.479482 sec +TOTAL : 0.482195 sec INFO: No Floating Point Exceptions have been reported - 1,991,295,837 cycles # 2.814 GHz - 2,882,362,006 instructions # 1.45 insn per cycle - 0.764140602 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 + 2,007,920,858 cycles # 2.849 GHz + 2,840,933,430 instructions # 1.41 insn per cycle + 0.763422225 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028811e+00 Avg ME (F77/GPU) = 2.0288499749731272 Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.185399e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.256844e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.256844e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.200574e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.271569e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.271569e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.884968 sec +TOTAL : 4.850875 sec INFO: No Floating Point Exceptions have been reported - 13,998,763,126 cycles # 2.864 GHz - 38,344,277,820 instructions # 2.74 insn per cycle - 4.889885817 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) + 14,073,569,281 cycles # 2.899 GHz + 38,343,239,881 instructions # 2.72 insn per cycle + 4.855897587 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288199022179469 Relative difference = 4.819651478256564e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.748933e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.130007e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.130007e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.925449e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.332953e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.332953e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.296210 sec +TOTAL : 2.217076 sec INFO: No Floating Point Exceptions have been reported - 6,436,971,327 cycles # 2.798 GHz - 15,817,314,373 instructions # 2.46 insn per cycle - 2.301153141 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2690) (avx2: 0) (512y: 0) (512z: 0) + 6,436,588,824 cycles # 2.899 GHz + 15,815,821,412 instructions # 2.46 insn per cycle + 2.222049918 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2690) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193548331037 Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.801045e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.008233e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.008233e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.963004e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.029520e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.029520e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.278323 sec +TOTAL : 1.256665 sec INFO: No Floating Point Exceptions have been reported - 3,450,636,880 cycles # 2.691 GHz - 7,594,398,635 instructions # 2.20 insn per cycle - 1.283271260 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3049) (512y: 0) (512z: 0) + 3,455,760,948 cycles # 2.740 GHz + 7,593,976,565 instructions # 2.20 insn per cycle + 1.261861875 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3049) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181684445590 Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.432847e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.092185e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.092185e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.569986e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.110539e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.110539e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.198755 sec +TOTAL : 1.182427 sec INFO: No Floating Point Exceptions have been reported - 3,240,338,452 cycles # 2.694 GHz - 7,202,058,966 instructions # 2.22 insn per cycle - 1.203680701 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2849) (512y: 23) (512z: 0) + 3,244,770,474 cycles # 2.734 GHz + 7,203,559,407 instructions # 2.22 insn per cycle + 1.187623854 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2849) (512y: 23) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181684445590 Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.702154e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.416282e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.416282e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.864494e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.605662e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.605662e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.651897 sec +TOTAL : 1.614546 sec INFO: No Floating Point Exceptions have been reported - 3,048,638,509 cycles # 1.842 GHz - 5,835,754,619 instructions # 1.91 insn per cycle - 1.657060523 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2368) (512y: 24) (512z: 1888) + 3,050,749,421 cycles # 1.885 GHz + 5,835,755,685 instructions # 1.91 insn per cycle + 1.619564037 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2368) (512y: 24) (512z: 1888) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183349184692 Relative difference = 1.6508058850146622e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt index 140c2bd43a..426db838d7 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt @@ -40,11 +40,11 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_09:19:06 +DATE: 2024-05-16_15:01:19 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -53,16 +53,16 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.780357e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.391628e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.391628e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.801236e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.462846e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.462846e+07 ) sec^-1 MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 -TOTAL : 0.689034 sec +TOTAL : 0.684862 sec INFO: No Floating Point Exceptions have been reported - 2,591,002,919 cycles # 2.825 GHz - 3,986,567,081 instructions # 1.54 insn per cycle - 0.976058283 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge + 2,586,573,508 cycles # 2.828 GHz + 4,016,406,941 instructions # 1.55 insn per cycle + 0.971565490 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) @@ -70,19 +70,19 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028811e+00 Avg ME (F77/GPU) = 2.0288499749731272 Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -90,28 +90,28 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.176806e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.247940e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.247940e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.176436e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.247449e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.247449e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.947848 sec +TOTAL : 4.948763 sec INFO: No Floating Point Exceptions have been reported - 14,177,339,083 cycles # 2.862 GHz - 38,384,281,500 instructions # 2.71 insn per cycle - 4.954293026 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) + 14,176,104,430 cycles # 2.862 GHz + 38,383,843,895 instructions # 2.71 insn per cycle + 4.955194603 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288199022179469 Relative difference = 4.819651478256564e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -119,28 +119,28 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.805267e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.201006e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.201006e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.809798e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.200764e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.200764e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.318235 sec +TOTAL : 2.315849 sec INFO: No Floating Point Exceptions have been reported - 6,645,221,660 cycles # 2.860 GHz - 16,095,917,759 instructions # 2.42 insn per cycle - 2.324561734 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2690) (avx2: 0) (512y: 0) (512z: 0) + 6,633,418,276 cycles # 2.858 GHz + 16,095,968,093 instructions # 2.43 insn per cycle + 2.322298973 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2690) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193548331037 Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -148,28 +148,28 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.666331e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.908244e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.908244e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.679036e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.925640e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.925640e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.344653 sec +TOTAL : 1.342355 sec INFO: No Floating Point Exceptions have been reported - 3,645,388,480 cycles # 2.700 GHz - 7,830,837,428 instructions # 2.15 insn per cycle - 1.351075698 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3049) (512y: 0) (512z: 0) + 3,640,592,514 cycles # 2.701 GHz + 7,831,268,120 instructions # 2.15 insn per cycle + 1.348786146 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3049) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181684445590 Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -177,28 +177,28 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.154300e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.057045e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.057045e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.163700e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.056629e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.056629e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.280564 sec +TOTAL : 1.278871 sec INFO: No Floating Point Exceptions have been reported - 3,432,046,758 cycles # 2.669 GHz - 7,439,136,566 instructions # 2.17 insn per cycle - 1.286864532 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2849) (512y: 23) (512z: 0) + 3,437,646,895 cycles # 2.676 GHz + 7,439,842,858 instructions # 2.16 insn per cycle + 1.285386542 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2849) (512y: 23) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181684445590 Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -206,22 +206,22 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.590499e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.279613e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.279613e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.597215e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.292791e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.292791e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.727906 sec +TOTAL : 1.727252 sec INFO: No Floating Point Exceptions have been reported - 3,258,297,245 cycles # 1.880 GHz - 6,091,097,760 instructions # 1.87 insn per cycle - 1.734344558 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2368) (512y: 24) (512z: 1888) + 3,258,697,081 cycles # 1.881 GHz + 6,089,840,836 instructions # 1.87 insn per cycle + 1.733818978 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2368) (512y: 24) (512z: 1888) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183349184692 Relative difference = 1.6508058850146622e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt index f941d4d189..884891874e 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_09:30:09 +DATE: 2024-05-16_15:12:23 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.441294e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.648540e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.972779e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.468958e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.648278e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.971571e+08 ) sec^-1 MeanMatrixElemValue = ( 2.079446e+00 +- 3.403306e-03 ) GeV^0 -TOTAL : 0.577869 sec +TOTAL : 0.575797 sec INFO: No Floating Point Exceptions have been reported - 2,259,987,894 cycles # 2.820 GHz - 3,318,785,216 instructions # 1.47 insn per cycle - 0.858245386 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --common + 2,271,357,910 cycles # 2.845 GHz + 3,342,640,625 instructions # 1.47 insn per cycle + 0.855647595 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028811e+00 Avg ME (F77/GPU) = 2.0288499749731272 Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.185586e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.257256e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.257256e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.198151e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.269622e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.269622e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 4.942195 sec +TOTAL : 4.914840 sec INFO: No Floating Point Exceptions have been reported - 14,162,316,219 cycles # 2.863 GHz - 38,370,272,145 instructions # 2.71 insn per cycle - 4.947449358 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) + 14,211,276,974 cycles # 2.889 GHz + 38,370,210,397 instructions # 2.70 insn per cycle + 4.920108721 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288199022179469 Relative difference = 4.819651478256564e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.853985e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.254344e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.254344e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.892733e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.301573e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.301573e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 2.306547 sec +TOTAL : 2.288479 sec INFO: No Floating Point Exceptions have been reported - 6,614,179,097 cycles # 2.862 GHz - 15,829,353,273 instructions # 2.39 insn per cycle - 2.311880085 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2690) (avx2: 0) (512y: 0) (512z: 0) + 6,608,042,838 cycles # 2.882 GHz + 15,829,158,403 instructions # 2.40 insn per cycle + 2.293691008 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2690) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193548331037 Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.781038e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.006761e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.006761e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.919042e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.023820e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.023820e+06 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.338691 sec +TOTAL : 1.319201 sec INFO: No Floating Point Exceptions have been reported - 3,618,619,626 cycles # 2.694 GHz - 7,578,518,232 instructions # 2.09 insn per cycle - 1.343966759 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3049) (512y: 0) (512z: 0) + 3,618,631,378 cycles # 2.734 GHz + 7,578,247,859 instructions # 2.09 insn per cycle + 1.324366743 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3049) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181684445590 Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.447372e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.094056e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.094056e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.492699e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.100151e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.100151e+06 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.255442 sec +TOTAL : 1.250231 sec INFO: No Floating Point Exceptions have been reported - 3,405,820,822 cycles # 2.703 GHz - 7,152,441,430 instructions # 2.10 insn per cycle - 1.260812980 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2849) (512y: 23) (512z: 0) + 3,418,366,623 cycles # 2.724 GHz + 7,152,275,486 instructions # 2.09 insn per cycle + 1.255758340 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2849) (512y: 23) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181684445590 Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.714568e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.430604e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.430604e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.830732e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.562097e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.562097e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.707472 sec +TOTAL : 1.681529 sec INFO: No Floating Point Exceptions have been reported - 3,213,810,412 cycles # 1.877 GHz - 5,785,708,337 instructions # 1.80 insn per cycle - 1.712770365 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2368) (512y: 24) (512z: 1888) + 3,218,452,038 cycles # 1.909 GHz + 5,786,270,960 instructions # 1.80 insn per cycle + 1.686847993 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2368) (512y: 24) (512z: 1888) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183349184692 Relative difference = 1.6508058850146622e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt index 5dd9924337..9b5852a8c1 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_09:27:22 +DATE: 2024-05-16_15:09:35 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 2 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.457342e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.645322e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.969571e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.497286e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.653761e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.976765e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.520144 sec +TOTAL : 0.520499 sec INFO: No Floating Point Exceptions have been reported - 2,098,800,484 cycles # 2.818 GHz - 3,302,360,884 instructions # 1.57 insn per cycle - 0.802331353 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst + 2,122,949,824 cycles # 2.819 GHz + 3,308,605,661 instructions # 1.56 insn per cycle + 0.811337951 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028811e+00 Avg ME (F77/GPU) = 2.0288499749731272 Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.183495e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.256672e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.256672e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.187282e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.258952e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.258952e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.891525 sec +TOTAL : 4.881435 sec INFO: No Floating Point Exceptions have been reported - 14,004,521,708 cycles # 2.861 GHz - 38,341,043,198 instructions # 2.74 insn per cycle - 4.896650903 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) + 13,993,887,356 cycles # 2.864 GHz + 38,340,879,445 instructions # 2.74 insn per cycle + 4.886765699 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288199022179469 Relative difference = 4.819651478256564e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.861114e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.259842e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.259842e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.866184e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.266559e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.266559e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.245870 sec +TOTAL : 2.243695 sec INFO: No Floating Point Exceptions have been reported - 6,439,615,857 cycles # 2.862 GHz - 15,815,454,278 instructions # 2.46 insn per cycle - 2.251122058 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2690) (avx2: 0) (512y: 0) (512z: 0) + 6,437,628,216 cycles # 2.863 GHz + 15,815,570,005 instructions # 2.46 insn per cycle + 2.248941783 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2690) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193548331037 Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.808799e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.009394e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.009394e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.699018e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.949673e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.949673e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.278168 sec +TOTAL : 1.293092 sec INFO: No Floating Point Exceptions have been reported - 3,449,152,774 cycles # 2.689 GHz - 7,593,321,212 instructions # 2.20 insn per cycle - 1.283559283 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3049) (512y: 0) (512z: 0) + 3,447,035,685 cycles # 2.657 GHz + 7,594,377,345 instructions # 2.20 insn per cycle + 1.298317015 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3049) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181684445590 Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.428242e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.091809e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.091809e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.410196e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.089229e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.089229e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.200122 sec +TOTAL : 1.202332 sec INFO: No Floating Point Exceptions have been reported - 3,241,815,360 cycles # 2.691 GHz - 7,203,446,508 instructions # 2.22 insn per cycle - 1.205490381 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2849) (512y: 23) (512z: 0) + 3,248,094,322 cycles # 2.691 GHz + 7,201,883,054 instructions # 2.22 insn per cycle + 1.207739630 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2849) (512y: 23) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181684445590 Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.709652e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.425629e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.425629e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.682713e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.392370e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.392370e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.651222 sec +TOTAL : 1.657517 sec INFO: No Floating Point Exceptions have been reported - 3,052,193,399 cycles # 1.843 GHz - 5,835,644,896 instructions # 1.91 insn per cycle - 1.656436466 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2368) (512y: 24) (512z: 1888) + 3,060,341,406 cycles # 1.842 GHz + 5,836,262,166 instructions # 1.91 insn per cycle + 1.662864711 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2368) (512y: 24) (512z: 1888) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183349184692 Relative difference = 1.6508058850146622e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt index a158958f69..7e3b1fa48e 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt @@ -40,177 +40,177 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_09:24:38 +DATE: 2024-05-16_15:06:52 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --rmbhst OMP= WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.492616e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.639082e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.961805e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.502594e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.623050e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.943883e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 -TOTAL : 0.626891 sec +TOTAL : 0.628666 sec INFO: No Floating Point Exceptions have been reported - 2,398,364,796 cycles # 2.824 GHz - 3,748,950,340 instructions # 1.56 insn per cycle - 0.905670831 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst + 2,403,264,425 cycles # 2.820 GHz + 3,734,811,294 instructions # 1.55 insn per cycle + 0.909767197 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028811e+00 Avg ME (F77/GPU) = 2.0288499749731272 Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.185825e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.257444e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.257444e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.185686e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.257300e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.257300e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.885821 sec +TOTAL : 4.884819 sec INFO: No Floating Point Exceptions have been reported - 13,998,111,113 cycles # 2.863 GHz - 38,343,247,328 instructions # 2.74 insn per cycle - 4.891063105 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) + 13,995,449,913 cycles # 2.863 GHz + 38,340,978,131 instructions # 2.74 insn per cycle + 4.889991891 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288199022179469 Relative difference = 4.819651478256564e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.683089e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.053456e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.053456e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.864053e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.263128e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.263128e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.328350 sec +TOTAL : 2.244540 sec INFO: No Floating Point Exceptions have been reported - 6,446,914,878 cycles # 2.764 GHz - 15,817,217,969 instructions # 2.45 insn per cycle - 2.333673502 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2690) (avx2: 0) (512y: 0) (512z: 0) + 6,436,419,349 cycles # 2.862 GHz + 15,815,556,279 instructions # 2.46 insn per cycle + 2.249779623 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2690) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193548331037 Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.620606e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.875671e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.875671e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.799961e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.008748e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.008748e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.305275 sec +TOTAL : 1.279634 sec INFO: No Floating Point Exceptions have been reported - 3,464,948,943 cycles # 2.645 GHz - 7,594,902,602 instructions # 2.19 insn per cycle - 1.310883359 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3049) (512y: 0) (512z: 0) + 3,447,592,643 cycles # 2.685 GHz + 7,593,708,789 instructions # 2.20 insn per cycle + 1.284877623 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3049) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181684445590 Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.424530e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.091692e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.091692e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.434984e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.092289e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.092289e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.200978 sec +TOTAL : 1.198937 sec INFO: No Floating Point Exceptions have been reported - 3,245,600,515 cycles # 2.692 GHz - 7,202,294,627 instructions # 2.22 insn per cycle - 1.206324794 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2849) (512y: 23) (512z: 0) + 3,242,375,801 cycles # 2.694 GHz + 7,202,509,960 instructions # 2.22 insn per cycle + 1.204245270 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2849) (512y: 23) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181684445590 Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.714096e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.429171e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.429171e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.713311e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.432943e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.432943e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.650070 sec +TOTAL : 1.650810 sec INFO: No Floating Point Exceptions have been reported - 3,052,618,225 cycles # 1.845 GHz - 5,835,036,219 instructions # 1.91 insn per cycle - 1.655525339 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2368) (512y: 24) (512z: 1888) + 3,050,285,995 cycles # 1.842 GHz + 5,834,789,164 instructions # 1.91 insn per cycle + 1.656446986 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2368) (512y: 24) (512z: 1888) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183349184692 Relative difference = 1.6508058850146622e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt index b53b6ca217..3e123e6fd7 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_08:54:09 +DATE: 2024-05-16_14:36:11 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.094541e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.761342e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.038400e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.096553e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.763289e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.037690e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.480529 sec +TOTAL : 0.480283 sec INFO: No Floating Point Exceptions have been reported - 2,029,736,118 cycles # 2.841 GHz - 2,894,990,051 instructions # 1.43 insn per cycle - 0.771583749 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/gcheck.exe -p 2048 256 1 + 2,036,711,218 cycles # 2.852 GHz + 2,918,453,967 instructions # 1.43 insn per cycle + 0.771336406 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028811e+00 Avg ME (F77/GPU) = 2.0288499749731272 Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.174936e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.244771e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.244771e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.166079e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.236793e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.236793e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.908019 sec +TOTAL : 4.926463 sec INFO: No Floating Point Exceptions have been reported - 14,301,494,173 cycles # 2.912 GHz - 39,833,565,583 instructions # 2.79 insn per cycle - 4.913066260 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 570) (avx2: 0) (512y: 0) (512z: 0) + 14,320,299,267 cycles # 2.905 GHz + 39,836,243,439 instructions # 2.78 insn per cycle + 4.931482509 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 570) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288199028000236 Relative difference = 4.790961076489297e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.740661e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.295355e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.295355e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.723514e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.285593e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.285593e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 1.913985 sec +TOTAL : 1.919156 sec INFO: No Floating Point Exceptions have been reported - 5,573,020,624 cycles # 2.905 GHz - 15,285,382,071 instructions # 2.74 insn per cycle - 1.918973539 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2474) (avx2: 0) (512y: 0) (512z: 0) + 5,582,245,803 cycles # 2.902 GHz + 15,285,424,302 instructions # 2.74 insn per cycle + 1.924109376 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2474) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193548331037 Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.280958e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.903297e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.903297e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.349024e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.991002e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.991002e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.755267 sec +TOTAL : 1.737963 sec INFO: No Floating Point Exceptions have been reported - 4,809,719,961 cycles # 2.734 GHz - 9,735,072,586 instructions # 2.02 insn per cycle - 1.760266597 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3708) (512y: 0) (512z: 0) + 4,749,494,972 cycles # 2.726 GHz + 9,735,095,064 instructions # 2.05 insn per cycle + 1.742978161 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3708) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288182108197361 Relative difference = 1.0391259163456515e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.569354e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.248087e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.248087e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.536931e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.219273e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.219273e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.681540 sec +TOTAL : 1.690263 sec INFO: No Floating Point Exceptions have been reported - 4,617,673,944 cycles # 2.739 GHz - 9,326,385,797 instructions # 2.02 insn per cycle - 1.686673769 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3496) (512y: 0) (512z: 0) + 4,623,322,631 cycles # 2.728 GHz + 9,325,575,279 instructions # 2.02 insn per cycle + 1.695318457 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3496) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288182108197361 Relative difference = 1.0391259163456515e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.608630e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.095881e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.095881e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.572579e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.052133e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.052133e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.956075 sec +TOTAL : 1.968034 sec INFO: No Floating Point Exceptions have been reported - 3,655,224,280 cycles # 1.865 GHz - 7,036,072,202 instructions # 1.92 insn per cycle - 1.961051327 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2610) (512y: 12) (512z: 2220) + 3,660,831,684 cycles # 1.856 GHz + 7,034,974,988 instructions # 1.92 insn per cycle + 1.973212700 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2610) (512y: 12) (512z: 2220) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183459779248 Relative difference = 1.7053177021099307e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt index 04d36cf6de..c7eded0fc2 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_09:10:34 +DATE: 2024-05-16_14:52:42 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.477193e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.654658e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.976661e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.456356e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.657836e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.983561e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.489832 sec +TOTAL : 0.487671 sec INFO: No Floating Point Exceptions have been reported - 2,011,185,622 cycles # 2.813 GHz - 2,828,692,272 instructions # 1.41 insn per cycle - 0.773489784 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/gcheck.exe -p 2048 256 1 + 2,030,099,363 cycles # 2.844 GHz + 2,856,891,631 instructions # 1.41 insn per cycle + 0.771313393 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028811e+00 Avg ME (F77/GPU) = 2.0288499749731272 Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.401235e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.488036e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.488036e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.397227e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.481743e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.481743e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.455338 sec +TOTAL : 4.460576 sec INFO: No Floating Point Exceptions have been reported - 12,589,426,076 cycles # 2.823 GHz - 34,372,775,517 instructions # 2.73 insn per cycle - 4.460794985 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 696) (avx2: 0) (512y: 0) (512z: 0) + 12,588,647,411 cycles # 2.819 GHz + 34,372,288,545 instructions # 2.73 insn per cycle + 4.465853868 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 696) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288199094356969 Relative difference = 4.463890496342449e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.156094e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.608144e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.608144e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.225217e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.687950e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.687950e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.122636 sec +TOTAL : 2.095644 sec INFO: No Floating Point Exceptions have been reported - 6,090,571,230 cycles # 2.863 GHz - 14,860,717,951 instructions # 2.44 insn per cycle - 2.128007707 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3009) (avx2: 0) (512y: 0) (512z: 0) + 6,085,238,066 cycles # 2.897 GHz + 14,860,574,019 instructions # 2.44 insn per cycle + 2.101017455 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3009) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193803280592 Relative difference = 1.8746278463897685e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.999467e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.786326e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.786326e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.969640e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.750011e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.750011e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.586058 sec +TOTAL : 1.592133 sec INFO: No Floating Point Exceptions have been reported - 4,262,406,214 cycles # 2.680 GHz - 9,028,780,676 instructions # 2.12 insn per cycle - 1.591291867 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4443) (512y: 0) (512z: 0) + 4,316,607,801 cycles # 2.703 GHz + 9,028,975,402 instructions # 2.09 insn per cycle + 1.597664902 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4443) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181999931112 Relative difference = 9.857617164523888e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.135753e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.962770e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.962770e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.187100e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.023996e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.023996e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.556995 sec +TOTAL : 1.545422 sec INFO: No Floating Point Exceptions have been reported - 4,192,442,485 cycles # 2.685 GHz - 8,663,376,318 instructions # 2.07 insn per cycle - 1.562443081 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4243) (512y: 0) (512z: 0) + 4,204,195,380 cycles # 2.712 GHz + 8,663,569,400 instructions # 2.06 insn per cycle + 1.550927334 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4243) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181999931112 Relative difference = 9.857617164523888e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.150850e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.567788e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.567788e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.251438e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.680453e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.680453e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 2.124034 sec +TOTAL : 2.083936 sec INFO: No Floating Point Exceptions have been reported - 3,834,225,268 cycles # 1.801 GHz - 7,808,561,781 instructions # 2.04 insn per cycle - 2.129432875 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4424) (512y: 0) (512z: 2555) + 3,833,998,104 cycles # 1.836 GHz + 7,808,361,622 instructions # 2.04 insn per cycle + 2.089489123 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4424) (512y: 0) (512z: 2555) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183246739209 Relative difference = 1.6003107281264138e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt index 8244fa7021..aad34f68a4 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_09:10:54 +DATE: 2024-05-16_14:53:02 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.482985e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.705623e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.042225e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.520611e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.721194e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.056652e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.488772 sec +TOTAL : 0.485221 sec INFO: No Floating Point Exceptions have been reported - 2,008,491,626 cycles # 2.813 GHz - 2,851,254,095 instructions # 1.42 insn per cycle - 0.772618257 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/gcheck.exe -p 2048 256 1 + 2,023,639,378 cycles # 2.841 GHz + 2,891,046,466 instructions # 1.43 insn per cycle + 0.769493206 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028811e+00 Avg ME (F77/GPU) = 2.0288499749731272 Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.520451e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.617692e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.617692e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.614708e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.719370e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.719370e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.248173 sec +TOTAL : 4.097789 sec INFO: No Floating Point Exceptions have been reported - 11,756,019,723 cycles # 2.765 GHz - 35,109,223,793 instructions # 2.99 insn per cycle - 4.253499726 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 470) (avx2: 0) (512y: 0) (512z: 0) + 11,755,034,517 cycles # 2.866 GHz + 35,108,588,793 instructions # 2.99 insn per cycle + 4.103114971 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 470) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288199094356969 Relative difference = 4.463890496342449e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.271716e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.745050e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.745050e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.332294e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.809853e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.809853e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.077721 sec +TOTAL : 2.053683 sec INFO: No Floating Point Exceptions have been reported - 5,957,421,378 cycles # 2.861 GHz - 14,471,556,150 instructions # 2.43 insn per cycle - 2.083048982 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2572) (avx2: 0) (512y: 0) (512z: 0) + 5,951,415,517 cycles # 2.891 GHz + 14,470,123,335 instructions # 2.43 insn per cycle + 2.059025817 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2572) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193583255634 Relative difference = 1.7661780742548925e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.214184e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.054314e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.054314e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.326940e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.191185e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.191185e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.541457 sec +TOTAL : 1.518155 sec INFO: No Floating Point Exceptions have been reported - 4,148,997,635 cycles # 2.683 GHz - 8,874,807,692 instructions # 2.14 insn per cycle - 1.546990735 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3574) (512y: 0) (512z: 0) + 4,152,217,913 cycles # 2.727 GHz + 8,874,854,960 instructions # 2.14 insn per cycle + 1.523530355 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3574) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288182107033208 Relative difference = 1.0385521077446488e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.269929e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.124022e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.124022e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.326335e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.192412e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.192412e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.529902 sec +TOTAL : 1.518142 sec INFO: No Floating Point Exceptions have been reported - 4,119,777,235 cycles # 2.685 GHz - 8,411,495,965 instructions # 2.04 insn per cycle - 1.535269339 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3319) (512y: 0) (512z: 0) + 4,138,145,120 cycles # 2.717 GHz + 8,411,511,000 instructions # 2.03 insn per cycle + 1.523559219 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3319) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288182107033208 Relative difference = 1.0385521077446488e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.237727e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.663628e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.663628e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.337364e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.777859e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.777859e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 2.090144 sec +TOTAL : 2.053123 sec INFO: No Floating Point Exceptions have been reported - 3,776,443,658 cycles # 1.803 GHz - 7,702,638,011 instructions # 2.04 insn per cycle - 2.095484503 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3440) (512y: 0) (512z: 2107) + 3,784,038,038 cycles # 1.840 GHz + 7,702,433,783 instructions # 2.04 insn per cycle + 2.058532499 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3440) (512y: 0) (512z: 2107) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183204829693 Relative difference = 1.5796536184903122e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index 79ad6a8d0a..ff88d5da2d 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_08:54:30 +DATE: 2024-05-16_14:36:32 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.192411e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.181219e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.276445e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.198792e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.180605e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.275668e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.523408 sec +TOTAL : 0.521467 sec INFO: No Floating Point Exceptions have been reported - 2,152,861,783 cycles # 2.850 GHz - 3,076,817,120 instructions # 1.43 insn per cycle - 0.813114486 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/gcheck.exe -p 2048 256 1 + 2,143,649,339 cycles # 2.843 GHz + 3,098,162,725 instructions # 1.45 insn per cycle + 0.810608393 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 Avg ME (F77/GPU) = 2.0288063423243874 Relative difference = 3.241686432649386e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.038755e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.097734e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.097734e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.033714e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.092456e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.092456e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.250907 sec +TOTAL : 5.262850 sec INFO: No Floating Point Exceptions have been reported - 15,281,663,127 cycles # 2.908 GHz - 38,577,348,657 instructions # 2.52 insn per cycle - 5.256051533 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 672) (avx2: 0) (512y: 0) (512z: 0) + 15,278,986,093 cycles # 2.901 GHz + 38,575,389,182 instructions # 2.52 insn per cycle + 5.268064562 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 672) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.526101e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.725280e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.725280e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.527314e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.723139e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.723139e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.081282 sec +TOTAL : 3.080390 sec INFO: No Floating Point Exceptions have been reported - 8,974,011,511 cycles # 2.909 GHz - 24,225,398,469 instructions # 2.70 insn per cycle - 3.086510757 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2188) (avx2: 0) (512y: 0) (512z: 0) + 8,961,614,258 cycles # 2.906 GHz + 24,226,315,758 instructions # 2.70 insn per cycle + 3.085434765 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2188) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.577179e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.066553e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.066553e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.613394e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.100134e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.100134e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.989604 sec +TOTAL : 1.976346 sec INFO: No Floating Point Exceptions have been reported - 5,407,138,280 cycles # 2.712 GHz - 11,277,258,107 instructions # 2.09 insn per cycle - 1.994773312 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2480) (512y: 0) (512z: 0) + 5,394,338,439 cycles # 2.724 GHz + 11,277,527,499 instructions # 2.09 insn per cycle + 1.981499886 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2480) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288064057068964 Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.339166e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.965304e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.965304e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.276948e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.897611e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.897611e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.761508 sec +TOTAL : 1.778784 sec INFO: No Floating Point Exceptions have been reported - 4,844,335,136 cycles # 2.743 GHz - 10,524,652,969 instructions # 2.17 insn per cycle - 1.766651228 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2167) (512y: 148) (512z: 0) + 4,855,499,941 cycles # 2.723 GHz + 10,526,571,188 instructions # 2.17 insn per cycle + 1.784170390 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2167) (512y: 148) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288064057068964 Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.829545e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.049389e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.049389e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.815864e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.036087e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.036087e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.845936 sec +TOTAL : 2.856419 sec INFO: No Floating Point Exceptions have been reported - 5,212,328,534 cycles # 1.829 GHz - 7,603,724,154 instructions # 1.46 insn per cycle - 2.851172084 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1633) (512y: 126) (512z: 1608) + 5,199,981,370 cycles # 1.818 GHz + 7,603,665,117 instructions # 1.46 insn per cycle + 2.861804972 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1633) (512y: 126) (512z: 1608) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288064057068964 Relative difference = 2.9292737240031234e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt index 0e63a3f59f..1d76304278 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_08:54:53 +DATE: 2024-05-16_14:36:55 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.193864e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.183193e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.279310e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.208651e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.184994e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.280716e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.521792 sec +TOTAL : 0.522551 sec INFO: No Floating Point Exceptions have been reported - 2,149,548,104 cycles # 2.853 GHz - 3,068,777,900 instructions # 1.43 insn per cycle - 0.810473258 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/gcheck.exe -p 2048 256 1 + 2,145,230,616 cycles # 2.840 GHz + 3,093,123,772 instructions # 1.44 insn per cycle + 0.812278354 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 Avg ME (F77/GPU) = 2.0288063423243874 Relative difference = 3.241686432649386e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.026547e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.084545e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.084545e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.021911e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.079930e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.079930e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.282085 sec +TOTAL : 5.294031 sec INFO: No Floating Point Exceptions have been reported - 15,346,439,759 cycles # 2.903 GHz - 40,370,397,466 instructions # 2.63 insn per cycle - 5.287522626 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 669) (avx2: 0) (512y: 0) (512z: 0) + 15,341,153,400 cycles # 2.896 GHz + 40,370,282,827 instructions # 2.63 insn per cycle + 5.299425936 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 669) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.722569e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.941157e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.941157e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.710012e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.926494e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.926494e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.924371 sec +TOTAL : 2.935201 sec INFO: No Floating Point Exceptions have been reported - 8,527,497,888 cycles # 2.912 GHz - 23,255,313,092 instructions # 2.73 insn per cycle - 2.929888638 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2091) (avx2: 0) (512y: 0) (512z: 0) + 8,515,314,447 cycles # 2.897 GHz + 23,253,613,819 instructions # 2.73 insn per cycle + 2.940392108 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2091) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.830249e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.195018e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.195018e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.780066e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.132607e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.132607e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.278721 sec +TOTAL : 2.302191 sec INFO: No Floating Point Exceptions have been reported - 6,249,543,045 cycles # 2.737 GHz - 12,962,507,292 instructions # 2.07 insn per cycle - 2.283921258 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2669) (512y: 0) (512z: 0) + 6,262,262,467 cycles # 2.715 GHz + 12,962,490,062 instructions # 2.07 insn per cycle + 2.307689771 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2669) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288064057068964 Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.098042e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.497971e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.497971e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.109643e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.511847e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.511847e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.164012 sec +TOTAL : 2.160493 sec INFO: No Floating Point Exceptions have been reported - 5,901,625,740 cycles # 2.721 GHz - 12,240,479,816 instructions # 2.07 insn per cycle - 2.169281960 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2209) (512y: 296) (512z: 0) + 5,903,466,716 cycles # 2.727 GHz + 12,238,680,442 instructions # 2.07 insn per cycle + 2.165768560 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2209) (512y: 296) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288064057068964 Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.529482e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.716465e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.716465e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.507940e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.694154e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.694154e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.079417 sec +TOTAL : 3.097205 sec INFO: No Floating Point Exceptions have been reported - 5,590,192,539 cycles # 1.813 GHz - 8,743,868,009 instructions # 1.56 insn per cycle - 3.084800754 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1490) (512y: 183) (512z: 1909) + 5,614,268,818 cycles # 1.810 GHz + 8,744,074,840 instructions # 1.56 insn per cycle + 3.102417520 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1490) (512y: 183) (512z: 1909) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288064057068964 Relative difference = 2.9292737240031234e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index 12104a4304..1d7490861d 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-15_08:55:17 +DATE: 2024-05-16_14:37:19 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.008388e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.051803e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.064529e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.992211e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.047041e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.061161e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.468501 sec +TOTAL : 0.469322 sec INFO: No Floating Point Exceptions have been reported - 1,972,372,372 cycles # 2.847 GHz - 2,839,654,276 instructions # 1.44 insn per cycle - 0.749356899 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 1 + 1,970,950,644 cycles # 2.853 GHz + 2,836,233,202 instructions # 1.44 insn per cycle + 0.747868437 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.126797e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.326776e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.338237e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.129686e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.329949e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.341716e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.602601 sec +TOTAL : 0.607217 sec INFO: No Floating Point Exceptions have been reported - 2,374,378,503 cycles # 2.828 GHz - 3,620,992,812 instructions # 1.53 insn per cycle - 0.897814922 seconds time elapsed + 2,397,125,482 cycles # 2.825 GHz + 3,658,262,516 instructions # 1.53 insn per cycle + 0.909559944 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 Avg ME (F77/GPU) = 1.4131213684418649 Relative difference = 4.469239988637851e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.422476e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.434911e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.434911e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.379379e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.391311e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.391311e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.787770 sec +TOTAL : 6.910347 sec INFO: No Floating Point Exceptions have been reported - 19,779,847,754 cycles # 2.913 GHz - 59,609,434,037 instructions # 3.01 insn per cycle - 6.791923869 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1466) (avx2: 0) (512y: 0) (512z: 0) + 19,789,020,586 cycles # 2.863 GHz + 59,609,829,111 instructions # 3.01 insn per cycle + 6.914699001 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1466) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.626684e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.671405e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.671405e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.619966e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.665049e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.665049e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.564782 sec +TOTAL : 3.569460 sec INFO: No Floating Point Exceptions have been reported - 10,370,410,191 cycles # 2.907 GHz - 30,676,006,440 instructions # 2.96 insn per cycle - 3.569040397 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5153) (avx2: 0) (512y: 0) (512z: 0) + 10,374,266,250 cycles # 2.904 GHz + 30,674,256,165 instructions # 2.96 insn per cycle + 3.573646642 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5153) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.224141e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.397286e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.397286e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.120184e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.293257e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.293257e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.799480 sec +TOTAL : 1.820051 sec INFO: No Floating Point Exceptions have been reported - 4,894,819,750 cycles # 2.715 GHz - 11,018,842,048 instructions # 2.25 insn per cycle - 1.803612811 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4467) (512y: 0) (512z: 0) + 4,901,380,147 cycles # 2.688 GHz + 11,019,047,598 instructions # 2.25 insn per cycle + 1.824311195 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4467) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.032219e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.053358e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.053358e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.028182e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.049956e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.049956e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.610689 sec +TOTAL : 1.616748 sec INFO: No Floating Point Exceptions have been reported - 4,375,444,673 cycles # 2.711 GHz - 10,295,955,997 instructions # 2.35 insn per cycle - 1.614893060 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4137) (512y: 91) (512z: 0) + 4,378,615,331 cycles # 2.702 GHz + 10,296,117,856 instructions # 2.35 insn per cycle + 1.621129053 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4137) (512y: 91) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.058331e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.161062e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.161062e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.954224e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.056280e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.056280e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.347148 sec +TOTAL : 2.381742 sec INFO: No Floating Point Exceptions have been reported - 4,106,345,128 cycles # 1.747 GHz - 5,842,279,382 instructions # 1.42 insn per cycle - 2.351375618 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1540) (512y: 95) (512z: 3466) + 4,108,596,097 cycles # 1.723 GHz + 5,842,404,115 instructions # 1.42 insn per cycle + 2.385936782 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1540) (512y: 95) (512z: 3466) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt index aa42dcbcf3..45a1ef164b 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt @@ -40,11 +40,11 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-15_09:19:26 +DATE: 2024-05-16_15:01:40 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -53,16 +53,16 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.556239e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.826539e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.826539e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.535443e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.780857e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.780857e+06 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.500480 sec +TOTAL : 0.503670 sec INFO: No Floating Point Exceptions have been reported - 2,010,386,751 cycles # 2.815 GHz - 3,060,121,362 instructions # 1.52 insn per cycle - 0.771215563 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge + 2,012,376,201 cycles # 2.812 GHz + 3,006,218,540 instructions # 1.49 insn per cycle + 0.774572160 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) @@ -70,7 +70,7 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -79,29 +79,29 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.633980e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.588534e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.588534e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.606024e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.624765e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.624765e+06 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.837499 sec +TOTAL : 0.841754 sec INFO: No Floating Point Exceptions have been reported - 3,090,776,663 cycles # 2.832 GHz - 4,980,733,695 instructions # 1.61 insn per cycle - 1.149431601 seconds time elapsed + 3,099,668,806 cycles # 2.832 GHz + 4,993,276,525 instructions # 1.61 insn per cycle + 1.155254157 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 Avg ME (F77/GPU) = 1.4131213684418649 Relative difference = 4.469239988637851e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -109,28 +109,28 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.381649e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.393658e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.393658e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.380068e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.392068e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.392068e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.910820 sec +TOTAL : 6.915910 sec INFO: No Floating Point Exceptions have been reported - 19,788,417,607 cycles # 2.862 GHz - 59,611,607,061 instructions # 3.01 insn per cycle - 6.915222633 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1466) (avx2: 0) (512y: 0) (512z: 0) + 19,806,579,322 cycles # 2.863 GHz + 59,611,012,266 instructions # 3.01 insn per cycle + 6.920308116 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1466) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -138,28 +138,28 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.548645e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.593750e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.593750e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.550339e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.594733e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.594733e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.634125 sec +TOTAL : 3.631431 sec INFO: No Floating Point Exceptions have been reported - 10,404,915,587 cycles # 2.860 GHz - 30,722,373,622 instructions # 2.95 insn per cycle - 3.638600833 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5153) (avx2: 0) (512y: 0) (512z: 0) + 10,404,134,292 cycles # 2.862 GHz + 30,722,305,980 instructions # 2.95 insn per cycle + 3.635916319 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5153) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -167,28 +167,28 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.002692e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.176059e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.176059e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.991824e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.166141e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.166141e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.851700 sec +TOTAL : 1.853778 sec INFO: No Floating Point Exceptions have been reported - 4,942,815,311 cycles # 2.664 GHz - 11,068,193,942 instructions # 2.24 insn per cycle - 1.856145679 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4467) (512y: 0) (512z: 0) + 4,943,570,309 cycles # 2.661 GHz + 11,067,752,215 instructions # 2.24 insn per cycle + 1.858370590 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4467) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -196,28 +196,28 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.006966e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.028368e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.028368e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.005140e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.026682e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.026682e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.658669 sec +TOTAL : 1.662867 sec INFO: No Floating Point Exceptions have been reported - 4,425,875,596 cycles # 2.662 GHz - 10,345,271,959 instructions # 2.34 insn per cycle - 1.663069368 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4137) (512y: 91) (512z: 0) + 4,426,260,539 cycles # 2.656 GHz + 10,346,882,831 instructions # 2.34 insn per cycle + 1.667431238 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4137) (512y: 91) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -225,22 +225,22 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.826905e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.925862e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.925862e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.832038e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.932754e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.932754e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.434578 sec +TOTAL : 2.431611 sec INFO: No Floating Point Exceptions have been reported - 4,152,358,413 cycles # 1.703 GHz - 5,882,259,715 instructions # 1.42 insn per cycle - 2.439137376 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1540) (512y: 95) (512z: 3466) + 4,145,808,516 cycles # 1.702 GHz + 5,880,428,508 instructions # 1.42 insn per cycle + 2.436095886 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1540) (512y: 95) (512z: 3466) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt index f0e031f62b..c8d4c1d012 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-15_08:55:42 +DATE: 2024-05-16_14:37:45 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.987492e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.046583e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.059392e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.984938e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.044546e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.056865e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.470179 sec +TOTAL : 0.468609 sec INFO: No Floating Point Exceptions have been reported - 1,973,441,131 cycles # 2.857 GHz - 2,842,887,834 instructions # 1.44 insn per cycle - 0.747905498 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/gcheck.exe -p 64 256 1 + 1,981,002,182 cycles # 2.846 GHz + 2,842,945,772 instructions # 1.44 insn per cycle + 0.752497111 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.118345e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.315309e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.326550e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.119070e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.315352e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.326681e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.598306 sec +TOTAL : 0.601408 sec INFO: No Floating Point Exceptions have been reported - 2,389,171,542 cycles # 2.862 GHz - 3,615,113,401 instructions # 1.51 insn per cycle - 0.893887406 seconds time elapsed + 2,383,936,937 cycles # 2.851 GHz + 3,651,729,049 instructions # 1.53 insn per cycle + 0.896728355 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 Avg ME (F77/GPU) = 1.4131213684418649 Relative difference = 4.469239988637851e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.447561e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.460023e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.460023e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.454763e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.467389e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.467389e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.718011 sec +TOTAL : 6.698212 sec INFO: No Floating Point Exceptions have been reported - 19,539,796,204 cycles # 2.908 GHz - 58,797,374,199 instructions # 3.01 insn per cycle - 6.722231436 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1313) (avx2: 0) (512y: 0) (512z: 0) + 19,500,935,732 cycles # 2.911 GHz + 58,799,003,967 instructions # 3.02 insn per cycle + 6.702449206 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1313) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.697416e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.742995e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.742995e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.669930e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.715854e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.715854e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.510858 sec +TOTAL : 3.531511 sec INFO: No Floating Point Exceptions have been reported - 10,222,494,794 cycles # 2.909 GHz - 30,345,546,614 instructions # 2.97 insn per cycle - 3.515028540 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4970) (avx2: 0) (512y: 0) (512z: 0) + 10,228,095,464 cycles # 2.894 GHz + 30,347,180,891 instructions # 2.97 insn per cycle + 3.535798492 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4970) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.916344e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.077035e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.077035e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.789972e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.950829e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.950829e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.860593 sec +TOTAL : 1.887432 sec INFO: No Floating Point Exceptions have been reported - 5,052,529,312 cycles # 2.711 GHz - 11,483,367,724 instructions # 2.27 insn per cycle - 1.864806185 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4591) (512y: 0) (512z: 0) + 5,055,118,079 cycles # 2.674 GHz + 11,484,444,983 instructions # 2.27 insn per cycle + 1.891612421 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4591) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.703300e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.891625e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.891625e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.667837e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.860484e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.860484e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.711676 sec +TOTAL : 1.718788 sec INFO: No Floating Point Exceptions have been reported - 4,651,953,328 cycles # 2.712 GHz - 10,841,908,367 instructions # 2.33 insn per cycle - 1.715874244 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4183) (512y: 244) (512z: 0) + 4,655,858,880 cycles # 2.704 GHz + 10,842,096,596 instructions # 2.33 insn per cycle + 1.722993406 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4183) (512y: 244) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.026909e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.128215e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.128215e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.981237e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.082937e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.082937e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.356894 sec +TOTAL : 2.372571 sec INFO: No Floating Point Exceptions have been reported - 4,119,891,537 cycles # 1.746 GHz - 6,106,228,072 instructions # 1.48 insn per cycle - 2.361085439 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1457) (512y: 139) (512z: 3568) + 4,129,142,877 cycles # 1.738 GHz + 6,106,185,085 instructions # 1.48 insn per cycle + 2.376879303 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1457) (512y: 139) (512z: 3568) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 5c6c882772..e4bc7cf2cc 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-15_08:56:08 +DATE: 2024-05-16_14:38:10 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.546352e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.304110e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.387480e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.514552e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.271085e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.366020e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 -TOTAL : 0.452126 sec +TOTAL : 0.450662 sec INFO: No Floating Point Exceptions have been reported - 1,893,414,828 cycles # 2.840 GHz - 2,700,627,762 instructions # 1.43 insn per cycle - 0.725425446 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 1 + 1,888,418,045 cycles # 2.834 GHz + 2,686,004,303 instructions # 1.42 insn per cycle + 0.722549365 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 254 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.427477e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.467802e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.537028e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.424662e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.459806e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.527254e+07 ) sec^-1 MeanMatrixElemValue = ( 6.630099e+02 +- 4.770719e+02 ) GeV^-2 -TOTAL : 0.495991 sec +TOTAL : 0.495261 sec INFO: No Floating Point Exceptions have been reported - 2,097,978,612 cycles # 2.852 GHz - 3,016,663,704 instructions # 1.44 insn per cycle - 0.792464663 seconds time elapsed + 2,099,817,827 cycles # 2.862 GHz + 2,990,738,948 instructions # 1.42 insn per cycle + 0.790419941 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.412608e+00 Avg ME (F77/GPU) = 1.4132214346515752 Relative difference = 0.00043425681546129636 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.509920e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.522973e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.522973e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.505220e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.518346e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.518346e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.550589 sec +TOTAL : 6.562288 sec INFO: No Floating Point Exceptions have been reported - 19,076,732,071 cycles # 2.911 GHz - 58,959,260,000 instructions # 3.09 insn per cycle - 6.554604317 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1034) (avx2: 0) (512y: 0) (512z: 0) + 19,080,957,547 cycles # 2.906 GHz + 58,959,648,789 instructions # 3.09 insn per cycle + 6.566573323 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1034) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.412986e+00 Avg ME (F77/C++) = 1.4129858051842916 Relative difference = 1.3787518662898538e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.236135e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.381348e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.381348e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.204155e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.352745e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.352745e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 2.010730 sec +TOTAL : 2.018056 sec INFO: No Floating Point Exceptions have been reported - 5,857,272,878 cycles # 2.908 GHz - 16,694,397,816 instructions # 2.85 insn per cycle - 2.014749871 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5765) (avx2: 0) (512y: 0) (512z: 0) + 5,861,245,947 cycles # 2.899 GHz + 16,693,370,121 instructions # 2.85 insn per cycle + 2.022246601 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5765) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.412987e+00 Avg ME (F77/C++) = 1.4129865669244737 Relative difference = 3.06496469061158e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.764087e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.827666e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.827666e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.747206e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.811751e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.811751e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 -TOTAL : 0.950207 sec +TOTAL : 0.959718 sec INFO: No Floating Point Exceptions have been reported - 2,597,080,381 cycles # 2.723 GHz - 5,980,321,548 instructions # 2.30 insn per cycle - 0.954318451 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4917) (512y: 0) (512z: 0) + 2,597,973,759 cycles # 2.697 GHz + 5,979,816,432 instructions # 2.30 insn per cycle + 0.963957244 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4917) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 Avg ME (F77/C++) = 1.4133161655815059 Relative difference = 1.1715816267550621e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.955925e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.034043e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.034043e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.928786e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.008064e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.008064e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 -TOTAL : 0.858903 sec +TOTAL : 0.871454 sec INFO: No Floating Point Exceptions have been reported - 2,340,800,245 cycles # 2.714 GHz - 5,601,926,177 instructions # 2.39 insn per cycle - 0.863041825 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4645) (512y: 36) (512z: 0) + 2,346,801,151 cycles # 2.682 GHz + 5,601,970,539 instructions # 2.39 insn per cycle + 0.875813732 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4645) (512y: 36) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 Avg ME (F77/C++) = 1.4133161655815059 Relative difference = 1.1715816267550621e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.446888e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.489350e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.489350e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.412327e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.455439e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.455439e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.155852 sec +TOTAL : 1.184240 sec INFO: No Floating Point Exceptions have been reported - 2,054,158,021 cycles # 1.772 GHz - 3,333,717,631 instructions # 1.62 insn per cycle - 1.159901647 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2144) (512y: 39) (512z: 3675) + 2,059,493,323 cycles # 1.734 GHz + 3,333,364,881 instructions # 1.62 insn per cycle + 1.188531798 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2144) (512y: 39) (512z: 3675) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 Avg ME (F77/C++) = 1.4133164033579249 Relative difference = 2.85398258307829e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt index 43c5e65f2a..d735dc5897 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt @@ -40,11 +40,11 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-15_09:19:52 +DATE: 2024-05-16_15:02:06 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -53,16 +53,16 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.708965e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.117187e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.117187e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.750186e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.085490e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.085490e+07 ) sec^-1 MeanMatrixElemValue = ( 1.009071e+02 +- 5.002295e+01 ) GeV^-2 -TOTAL : 0.465757 sec +TOTAL : 0.469338 sec INFO: No Floating Point Exceptions have been reported - 1,910,294,983 cycles # 2.814 GHz - 2,837,512,357 instructions # 1.49 insn per cycle - 0.735479351 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge + 1,918,362,944 cycles # 2.804 GHz + 2,834,169,916 instructions # 1.48 insn per cycle + 0.742178075 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) @@ -70,7 +70,7 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks ==PROF== Profiling "sigmaKin": launch__registers_per_thread 254 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -79,29 +79,29 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.531814e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.569827e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.569827e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.524122e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.570005e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.570005e+07 ) sec^-1 MeanMatrixElemValue = ( 6.737500e+02 +- 4.776370e+02 ) GeV^-2 -TOTAL : 0.650442 sec +TOTAL : 0.651816 sec INFO: No Floating Point Exceptions have been reported - 2,527,978,336 cycles # 2.826 GHz - 3,864,064,048 instructions # 1.53 insn per cycle - 0.951839385 seconds time elapsed + 2,503,160,784 cycles # 2.822 GHz + 3,832,792,162 instructions # 1.53 insn per cycle + 0.943470239 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.412608e+00 Avg ME (F77/GPU) = 1.4132214346515752 Relative difference = 0.00043425681546129636 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -109,28 +109,28 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.456896e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.469931e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.469931e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.465694e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.479110e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.479110e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.695714 sec +TOTAL : 6.671662 sec INFO: No Floating Point Exceptions have been reported - 19,187,788,903 cycles # 2.865 GHz - 58,964,541,904 instructions # 3.07 insn per cycle - 6.700042902 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1034) (avx2: 0) (512y: 0) (512z: 0) + 19,108,337,453 cycles # 2.863 GHz + 58,967,331,894 instructions # 3.09 insn per cycle + 6.675976597 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1034) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.412986e+00 Avg ME (F77/C++) = 1.4129858051842916 Relative difference = 1.3787518662898538e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -138,28 +138,28 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.096292e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.240749e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.240749e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.093089e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.238027e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.238027e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 2.050191 sec +TOTAL : 2.051178 sec INFO: No Floating Point Exceptions have been reported - 5,878,600,751 cycles # 2.862 GHz - 16,741,770,713 instructions # 2.85 insn per cycle - 2.054466215 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5765) (avx2: 0) (512y: 0) (512z: 0) + 5,880,119,320 cycles # 2.862 GHz + 16,741,679,626 instructions # 2.85 insn per cycle + 2.055508197 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5765) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.412987e+00 Avg ME (F77/C++) = 1.4129865669244737 Relative difference = 3.06496469061158e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -167,28 +167,28 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.635221e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.692353e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.692353e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.718905e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.782305e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.782305e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 -TOTAL : 1.028531 sec +TOTAL : 0.980043 sec INFO: No Floating Point Exceptions have been reported - 2,753,199,998 cycles # 2.668 GHz - 6,017,134,022 instructions # 2.19 insn per cycle - 1.032815039 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4917) (512y: 0) (512z: 0) + 2,616,418,693 cycles # 2.660 GHz + 6,017,096,104 instructions # 2.30 insn per cycle + 0.984343134 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4917) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 Avg ME (F77/C++) = 1.4133161655815059 Relative difference = 1.1715816267550621e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -196,28 +196,28 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.913612e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.992304e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.992304e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.912882e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.991175e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.991175e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 -TOTAL : 0.882693 sec +TOTAL : 0.883189 sec INFO: No Floating Point Exceptions have been reported - 2,367,924,107 cycles # 2.672 GHz - 5,639,124,107 instructions # 2.38 insn per cycle - 0.886915877 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4645) (512y: 36) (512z: 0) + 2,365,822,002 cycles # 2.667 GHz + 5,638,771,692 instructions # 2.38 insn per cycle + 0.887626463 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4645) (512y: 36) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 Avg ME (F77/C++) = 1.4133161655815059 Relative difference = 1.1715816267550621e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -225,22 +225,22 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.395393e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.436951e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.436951e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.399129e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.441231e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.441231e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.202996 sec +TOTAL : 1.200076 sec INFO: No Floating Point Exceptions have been reported - 2,083,621,174 cycles # 1.727 GHz - 3,374,799,861 instructions # 1.62 insn per cycle - 1.207355873 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2144) (512y: 39) (512z: 3675) + 2,081,452,605 cycles # 1.729 GHz + 3,374,965,036 instructions # 1.62 insn per cycle + 1.204429196 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2144) (512y: 39) (512z: 3675) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 Avg ME (F77/C++) = 1.4133164033579249 Relative difference = 2.85398258307829e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt index f08ecd1d39..3d41e21b12 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-15_08:56:28 +DATE: 2024-05-16_14:38:31 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.567059e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.326056e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.411539e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.548366e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.290418e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.382374e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 -TOTAL : 0.451783 sec +TOTAL : 0.453301 sec INFO: No Floating Point Exceptions have been reported - 1,890,106,900 cycles # 2.839 GHz - 2,663,406,859 instructions # 1.41 insn per cycle - 0.724170807 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/gcheck.exe -p 64 256 1 + 1,884,361,235 cycles # 2.811 GHz + 2,662,129,036 instructions # 1.41 insn per cycle + 0.727401829 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 248 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.396627e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.418487e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.484929e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.381856e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.386346e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.451907e+07 ) sec^-1 MeanMatrixElemValue = ( 6.630099e+02 +- 4.770719e+02 ) GeV^-2 -TOTAL : 0.497663 sec +TOTAL : 0.498921 sec INFO: No Floating Point Exceptions have been reported - 2,058,055,387 cycles # 2.818 GHz - 3,003,578,703 instructions # 1.46 insn per cycle - 0.786932640 seconds time elapsed + 2,065,776,106 cycles # 2.820 GHz + 3,002,526,593 instructions # 1.45 insn per cycle + 0.789720140 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.412608e+00 Avg ME (F77/GPU) = 1.4132214346515752 Relative difference = 0.00043425681546129636 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.521953e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.535392e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.535392e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.479714e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.492704e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.492704e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.519150 sec +TOTAL : 6.641350 sec INFO: No Floating Point Exceptions have been reported - 18,988,818,598 cycles # 2.912 GHz - 58,701,795,910 instructions # 3.09 insn per cycle - 6.523314910 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1029) (avx2: 0) (512y: 0) (512z: 0) + 18,978,826,784 cycles # 2.861 GHz + 58,704,221,037 instructions # 3.09 insn per cycle + 6.645410970 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1029) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.412986e+00 Avg ME (F77/C++) = 1.4129858051842916 Relative difference = 1.3787518662898538e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.643168e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.800713e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.800713e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.494310e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.651898e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.651898e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 1.916804 sec +TOTAL : 1.950028 sec INFO: No Floating Point Exceptions have been reported - 5,589,492,084 cycles # 2.911 GHz - 16,510,117,368 instructions # 2.95 insn per cycle - 1.921033755 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5551) (avx2: 0) (512y: 0) (512z: 0) + 5,589,974,968 cycles # 2.862 GHz + 16,510,304,699 instructions # 2.95 insn per cycle + 1.954264273 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5551) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.412987e+00 Avg ME (F77/C++) = 1.4129865669244737 Relative difference = 3.06496469061158e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.535382e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.582428e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.582428e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.496639e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.543532e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.543532e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 -TOTAL : 1.088293 sec +TOTAL : 1.116418 sec INFO: No Floating Point Exceptions have been reported - 2,973,830,855 cycles # 2.724 GHz - 6,634,211,735 instructions # 2.23 insn per cycle - 1.092429717 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5568) (512y: 0) (512z: 0) + 2,975,820,242 cycles # 2.657 GHz + 6,633,799,194 instructions # 2.23 insn per cycle + 1.120575232 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5568) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 Avg ME (F77/C++) = 1.4133161655815059 Relative difference = 1.1715816267550621e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.654844e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.711079e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.711079e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.615016e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.669374e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.669374e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 -TOTAL : 1.011276 sec +TOTAL : 1.036246 sec INFO: No Floating Point Exceptions have been reported - 2,757,667,641 cycles # 2.717 GHz - 6,254,951,975 instructions # 2.27 insn per cycle - 1.015690621 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5279) (512y: 25) (512z: 0) + 2,759,204,529 cycles # 2.654 GHz + 6,255,102,481 instructions # 2.27 insn per cycle + 1.040401186 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5279) (512y: 25) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 Avg ME (F77/C++) = 1.4133161655815059 Relative difference = 1.1715816267550621e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.327836e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.363787e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.363787e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.286831e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.322123e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.322123e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.257441 sec +TOTAL : 1.300128 sec INFO: No Floating Point Exceptions have been reported - 2,228,619,894 cycles # 1.768 GHz - 3,698,392,498 instructions # 1.66 insn per cycle - 1.261580008 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2391) (512y: 29) (512z: 3970) + 2,231,395,652 cycles # 1.715 GHz + 3,699,704,768 instructions # 1.66 insn per cycle + 1.304305216 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2391) (512y: 29) (512z: 3970) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 Avg ME (F77/C++) = 1.4133164033579249 Relative difference = 2.85398258307829e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index 8a353a8e39..18990368c8 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-15_08:56:49 +DATE: 2024-05-16_14:38:52 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.931968e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.043555e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.056151e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.980776e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.047318e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.059891e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.467354 sec +TOTAL : 0.469526 sec INFO: No Floating Point Exceptions have been reported - 1,981,280,626 cycles # 2.860 GHz - 2,847,085,390 instructions # 1.44 insn per cycle - 0.749733800 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/gcheck.exe -p 64 256 1 + 1,950,532,568 cycles # 2.815 GHz + 2,802,706,395 instructions # 1.44 insn per cycle + 0.749158155 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.119168e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.316295e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.327683e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.120585e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.317479e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.329114e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.603396 sec +TOTAL : 0.606344 sec INFO: No Floating Point Exceptions have been reported - 2,399,824,573 cycles # 2.857 GHz - 3,685,028,033 instructions # 1.54 insn per cycle - 0.898525901 seconds time elapsed + 2,403,151,636 cycles # 2.824 GHz + 3,669,339,361 instructions # 1.53 insn per cycle + 0.910110717 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 Avg ME (F77/GPU) = 1.4131213755569487 Relative difference = 4.418889885423659e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.390547e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.402443e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.402443e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.348054e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.359694e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.359694e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.877879 sec +TOTAL : 7.005029 sec INFO: No Floating Point Exceptions have been reported - 20,033,174,405 cycles # 2.911 GHz - 60,534,325,532 instructions # 3.02 insn per cycle - 6.882069019 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1399) (avx2: 0) (512y: 0) (512z: 0) + 20,055,951,018 cycles # 2.863 GHz + 60,536,467,053 instructions # 3.02 insn per cycle + 7.009312607 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1399) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213859069593 Relative difference = 4.345647726386255e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.728631e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.774861e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.774861e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.638770e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.684822e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.684822e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.488457 sec +TOTAL : 3.556206 sec INFO: No Floating Point Exceptions have been reported - 10,181,292,152 cycles # 2.916 GHz - 30,384,307,915 instructions # 2.98 insn per cycle - 3.492671905 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5280) (avx2: 0) (512y: 0) (512z: 0) + 10,186,602,629 cycles # 2.862 GHz + 30,386,009,701 instructions # 2.98 insn per cycle + 3.560429335 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5280) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213792564823 Relative difference = 4.392710025734405e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.201354e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.375070e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.375070e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.050822e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.223334e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.223334e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.803934 sec +TOTAL : 1.833811 sec INFO: No Floating Point Exceptions have been reported - 4,869,150,810 cycles # 2.694 GHz - 10,978,562,560 instructions # 2.25 insn per cycle - 1.808136290 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4624) (512y: 0) (512z: 0) + 4,877,548,863 cycles # 2.655 GHz + 10,978,535,397 instructions # 2.25 insn per cycle + 1.838126466 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4624) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213600217192 Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.054680e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.077442e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.077442e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.034701e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.056812e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.056812e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.576690 sec +TOTAL : 1.607005 sec INFO: No Floating Point Exceptions have been reported - 4,287,902,457 cycles # 2.713 GHz - 10,247,451,592 instructions # 2.39 insn per cycle - 1.581011258 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4280) (512y: 82) (512z: 0) + 4,285,859,041 cycles # 2.661 GHz + 10,248,085,853 instructions # 2.39 insn per cycle + 1.611327735 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4280) (512y: 82) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213600217192 Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.877704e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.973161e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.973161e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.675038e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.769490e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.769490e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.407671 sec +TOTAL : 2.480681 sec INFO: No Floating Point Exceptions have been reported - 4,207,339,558 cycles # 1.745 GHz - 6,043,036,802 instructions # 1.44 insn per cycle - 2.411911339 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2066) (512y: 117) (512z: 3540) + 4,211,204,679 cycles # 1.695 GHz + 6,044,041,090 instructions # 1.44 insn per cycle + 2.485018889 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2066) (512y: 117) (512z: 3540) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213786174055 Relative difference = 4.3972324717191576e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt index e88f85bc0c..bea6b18082 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-15_08:57:15 +DATE: 2024-05-16_14:39:18 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.932348e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.042306e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.055103e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.940348e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.041869e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.054764e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.466469 sec +TOTAL : 0.469154 sec INFO: No Floating Point Exceptions have been reported - 1,971,792,438 cycles # 2.854 GHz - 2,835,098,064 instructions # 1.44 insn per cycle - 0.747519363 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/gcheck.exe -p 64 256 1 + 1,946,414,728 cycles # 2.818 GHz + 2,803,423,086 instructions # 1.44 insn per cycle + 0.748059256 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.116570e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.312584e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.323780e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.116866e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.312173e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.323463e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.596823 sec +TOTAL : 0.604157 sec INFO: No Floating Point Exceptions have been reported - 2,383,594,932 cycles # 2.859 GHz - 3,684,187,301 instructions # 1.55 insn per cycle - 0.891590073 seconds time elapsed + 2,374,249,289 cycles # 2.818 GHz + 3,602,148,119 instructions # 1.52 insn per cycle + 0.902621411 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 Avg ME (F77/GPU) = 1.4131213755569487 Relative difference = 4.418889885423659e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.413781e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.425854e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.425854e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.368504e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.380280e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.380280e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.811710 sec +TOTAL : 6.941323 sec INFO: No Floating Point Exceptions have been reported - 19,857,309,753 cycles # 2.914 GHz - 59,935,036,773 instructions # 3.02 insn per cycle - 6.815878048 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1276) (avx2: 0) (512y: 0) (512z: 0) + 19,878,296,626 cycles # 2.863 GHz + 59,936,362,271 instructions # 3.02 insn per cycle + 6.945573140 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1276) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213859069593 Relative difference = 4.345647726386255e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.758386e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.805205e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.805205e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.689994e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.736297e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.736297e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.466198 sec +TOTAL : 3.516340 sec INFO: No Floating Point Exceptions have been reported - 10,083,849,462 cycles # 2.907 GHz - 30,099,290,738 instructions # 2.98 insn per cycle - 3.470474717 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5082) (avx2: 0) (512y: 0) (512z: 0) + 10,077,314,757 cycles # 2.863 GHz + 30,098,117,657 instructions # 2.99 insn per cycle + 3.520635536 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5082) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213792564823 Relative difference = 4.392710025734405e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.978413e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.141940e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.141940e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.778247e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.940877e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.940877e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.847805 sec +TOTAL : 1.889938 sec INFO: No Floating Point Exceptions have been reported - 5,017,288,589 cycles # 2.710 GHz - 11,482,665,006 instructions # 2.29 insn per cycle - 1.852007665 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4723) (512y: 0) (512z: 0) + 5,023,754,472 cycles # 2.654 GHz + 11,483,522,538 instructions # 2.29 insn per cycle + 1.894205310 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4723) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213600217192 Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.808178e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.000241e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.000241e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.644687e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.842226e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.842226e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.694132 sec +TOTAL : 1.722080 sec INFO: No Floating Point Exceptions have been reported - 4,593,132,694 cycles # 2.706 GHz - 10,809,915,136 instructions # 2.35 insn per cycle - 1.698316035 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4285) (512y: 234) (512z: 0) + 4,590,091,342 cycles # 2.660 GHz + 10,809,457,257 instructions # 2.35 insn per cycle + 1.726406566 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4285) (512y: 234) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213600217192 Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.855597e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.950807e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.950807e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.641517e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.735645e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.735645e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.415375 sec +TOTAL : 2.492729 sec INFO: No Floating Point Exceptions have been reported - 4,225,925,982 cycles # 1.747 GHz - 6,273,431,165 instructions # 1.48 insn per cycle - 2.419573682 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1961) (512y: 163) (512z: 3617) + 4,229,101,372 cycles # 1.695 GHz + 6,273,394,761 instructions # 1.48 insn per cycle + 2.496999493 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1961) (512y: 163) (512z: 3617) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213786174055 Relative difference = 4.3972324717191576e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index 35f4b07c31..adf6424639 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-15_08:57:40 +DATE: 2024-05-16_14:39:44 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.453896e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.476957e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.479157e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.453895e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.477096e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.479397e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.528972 sec +TOTAL : 0.529744 sec INFO: No Floating Point Exceptions have been reported - 2,207,774,651 cycles # 2.859 GHz - 3,437,899,945 instructions # 1.56 insn per cycle - 0.831248833 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 1 + 2,179,317,048 cycles # 2.822 GHz + 3,403,036,461 instructions # 1.56 insn per cycle + 0.830470867 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.141343e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.168816e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.169999e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.124157e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.151338e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.152519e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.026793 sec +TOTAL : 3.042150 sec INFO: No Floating Point Exceptions have been reported - 9,518,294,264 cycles # 2.901 GHz - 21,429,637,627 instructions # 2.25 insn per cycle - 3.336351765 seconds time elapsed + 9,405,604,432 cycles # 2.853 GHz + 20,118,562,201 instructions # 2.14 insn per cycle + 3.353608047 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266731198158133E-004 Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.869535e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.870457e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.870457e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.820592e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.821434e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.821434e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.782045 sec +TOTAL : 9.018372 sec INFO: No Floating Point Exceptions have been reported - 25,590,278,249 cycles # 2.913 GHz - 78,937,068,953 instructions # 3.08 insn per cycle - 8.786204437 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4893) (avx2: 0) (512y: 0) (512z: 0) + 25,614,013,948 cycles # 2.839 GHz + 78,938,013,495 instructions # 3.08 insn per cycle + 9.022664733 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4893) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.594981e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.598272e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.598272e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.519494e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.522699e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.522699e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.571469 sec +TOTAL : 4.669138 sec INFO: No Floating Point Exceptions have been reported - 12,870,732,355 cycles # 2.813 GHz - 39,279,748,127 instructions # 3.05 insn per cycle - 4.575784377 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13184) (avx2: 0) (512y: 0) (512z: 0) + 12,898,966,245 cycles # 2.761 GHz + 39,280,150,365 instructions # 3.05 insn per cycle + 4.673492352 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13184) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.041516e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.057538e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.057538e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.859599e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.875346e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.875346e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.048843 sec +TOTAL : 2.097013 sec INFO: No Floating Point Exceptions have been reported - 5,574,639,516 cycles # 2.716 GHz - 13,685,609,494 instructions # 2.45 insn per cycle - 2.053053177 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) + 5,574,685,577 cycles # 2.655 GHz + 13,685,856,406 instructions # 2.46 insn per cycle + 2.101249976 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.168356e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.189435e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.189435e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.915800e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.935807e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.935807e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.798126 sec +TOTAL : 1.848754 sec INFO: No Floating Point Exceptions have been reported - 4,886,886,725 cycles # 2.713 GHz - 12,340,823,521 instructions # 2.53 insn per cycle - 1.802269756 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) + 4,887,101,603 cycles # 2.639 GHz + 12,341,123,817 instructions # 2.53 insn per cycle + 1.853060894 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.912225e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.924556e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.924556e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.728417e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.739729e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.739729e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.382421 sec +TOTAL : 2.446881 sec INFO: No Floating Point Exceptions have been reported - 4,113,294,297 cycles # 1.724 GHz - 6,335,943,255 instructions # 1.54 insn per cycle - 2.386734974 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) + 4,107,098,137 cycles # 1.676 GHz + 6,336,202,498 instructions # 1.54 insn per cycle + 2.451096147 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt index fb56deab3c..92636e2555 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt @@ -40,11 +40,11 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-15_09:20:40 +DATE: 2024-05-16_15:02:53 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -53,16 +53,16 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.120517e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.462866e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.462866e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.094987e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.434034e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.434034e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.521719 sec +TOTAL : 0.523594 sec INFO: No Floating Point Exceptions have been reported - 2,119,788,652 cycles # 2.819 GHz - 3,351,368,851 instructions # 1.58 insn per cycle - 0.812195705 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge + 2,118,517,608 cycles # 2.813 GHz + 3,348,276,596 instructions # 1.58 insn per cycle + 0.813391390 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) @@ -70,7 +70,7 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -79,29 +79,29 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.618604e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.118495e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.118495e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.622834e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.121853e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.121853e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.329148 sec +TOTAL : 3.329713 sec INFO: No Floating Point Exceptions have been reported - 10,272,634,162 cycles # 2.852 GHz - 22,078,504,888 instructions # 2.15 insn per cycle - 3.660227253 seconds time elapsed + 10,291,111,145 cycles # 2.854 GHz + 21,714,903,322 instructions # 2.11 insn per cycle + 3.660758937 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266731198158133E-004 Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -109,28 +109,28 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.837905e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.838776e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.838776e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.836126e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.837051e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.837051e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.937469 sec +TOTAL : 8.946025 sec INFO: No Floating Point Exceptions have been reported - 25,594,477,998 cycles # 2.863 GHz - 78,950,199,042 instructions # 3.08 insn per cycle - 8.941935349 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4893) (avx2: 0) (512y: 0) (512z: 0) + 25,625,027,072 cycles # 2.863 GHz + 78,943,584,564 instructions # 3.08 insn per cycle + 8.950491990 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4893) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -138,28 +138,28 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.503068e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.506337e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.506337e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.512313e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.515690e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.515690e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.695365 sec +TOTAL : 4.682891 sec INFO: No Floating Point Exceptions have been reported - 12,898,085,999 cycles # 2.746 GHz - 39,295,739,926 instructions # 3.05 insn per cycle - 4.700102170 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13184) (avx2: 0) (512y: 0) (512z: 0) + 12,903,818,271 cycles # 2.754 GHz + 39,293,324,950 instructions # 3.05 insn per cycle + 4.687529036 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13184) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -167,28 +167,28 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.875978e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.891827e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.891827e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.867831e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.884189e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.884189e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.096243 sec +TOTAL : 2.098804 sec INFO: No Floating Point Exceptions have been reported - 5,586,723,161 cycles # 2.661 GHz - 13,697,374,494 instructions # 2.45 insn per cycle - 2.100722433 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) + 5,587,651,201 cycles # 2.658 GHz + 13,696,262,775 instructions # 2.45 insn per cycle + 2.103410758 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -196,28 +196,28 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.982842e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.004357e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.004357e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.952196e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.973818e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.973818e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.839387 sec +TOTAL : 1.845912 sec INFO: No Floating Point Exceptions have been reported - 4,902,530,960 cycles # 2.660 GHz - 12,352,684,842 instructions # 2.52 insn per cycle - 1.844032539 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) + 4,903,860,646 cycles # 2.651 GHz + 12,352,108,328 instructions # 2.52 insn per cycle + 1.850421022 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -225,22 +225,22 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.719703e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.732034e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.732034e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.711524e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.723541e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.723541e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.454573 sec +TOTAL : 2.457485 sec INFO: No Floating Point Exceptions have been reported - 4,128,287,661 cycles # 1.680 GHz - 6,347,060,627 instructions # 1.54 insn per cycle - 2.459158049 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) + 4,130,677,154 cycles # 1.678 GHz + 6,346,127,118 instructions # 1.54 insn per cycle + 2.462055019 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt index 0d1c3bb946..07bc3b6c73 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-15_09:30:29 +DATE: 2024-05-16_15:12:42 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.485197e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.513349e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.516105e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.490501e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.518177e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.520849e+05 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.515285 sec +TOTAL : 0.515008 sec INFO: No Floating Point Exceptions have been reported - 2,091,548,257 cycles # 2.809 GHz - 3,228,803,869 instructions # 1.54 insn per cycle - 0.805824094 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 1 --common + 2,117,861,647 cycles # 2.847 GHz + 3,355,581,223 instructions # 1.58 insn per cycle + 0.805282012 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --common ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.150668e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.184268e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.185674e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.120060e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.152876e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.154244e+05 ) sec^-1 MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 -TOTAL : 3.132364 sec +TOTAL : 3.146209 sec INFO: No Floating Point Exceptions have been reported - 9,662,847,578 cycles # 2.852 GHz - 21,406,248,753 instructions # 2.22 insn per cycle - 3.445338887 seconds time elapsed + 9,794,350,225 cycles # 2.878 GHz + 20,567,996,876 instructions # 2.10 insn per cycle + 3.458179285 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266731198158133E-004 Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe -p 64 256 1 --common OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.837499e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.838361e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.838361e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.854249e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.855163e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.855163e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 8.936739 sec +TOTAL : 8.856220 sec INFO: No Floating Point Exceptions have been reported - 25,597,377,366 cycles # 2.864 GHz - 78,941,252,238 instructions # 3.08 insn per cycle - 8.940922788 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4893) (avx2: 0) (512y: 0) (512z: 0) + 25,606,958,110 cycles # 2.890 GHz + 78,936,876,492 instructions # 3.08 insn per cycle + 8.860490718 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4893) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 1 --common OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.515333e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.518537e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.518537e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.547585e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.550823e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.550823e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 4.676416 sec +TOTAL : 4.633866 sec INFO: No Floating Point Exceptions have been reported - 12,904,346,221 cycles # 2.758 GHz - 39,280,145,215 instructions # 3.04 insn per cycle - 4.680551685 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13184) (avx2: 0) (512y: 0) (512z: 0) + 12,886,616,952 cycles # 2.779 GHz + 39,279,548,039 instructions # 3.05 insn per cycle + 4.638052623 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13184) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 1 --common OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.866398e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.881997e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.881997e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.950793e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.966539e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.966539e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 2.096164 sec +TOTAL : 2.073967 sec INFO: No Floating Point Exceptions have been reported - 5,577,736,611 cycles # 2.657 GHz - 13,686,218,579 instructions # 2.45 insn per cycle - 2.100327168 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) + 5,577,712,569 cycles # 2.685 GHz + 13,684,498,611 instructions # 2.45 insn per cycle + 2.078154877 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe -p 64 256 1 --common OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.968356e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.988747e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.988747e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.068596e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.089664e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.089664e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.839965 sec +TOTAL : 1.820316 sec INFO: No Floating Point Exceptions have been reported - 4,891,342,706 cycles # 2.654 GHz - 12,339,210,101 instructions # 2.52 insn per cycle - 1.844117982 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) + 4,894,997,970 cycles # 2.684 GHz + 12,339,079,686 instructions # 2.52 insn per cycle + 1.824557454 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe -p 64 256 1 --common OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.718515e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.730467e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.730467e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.817590e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.829323e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.829323e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 2.452431 sec +TOTAL : 2.417091 sec INFO: No Floating Point Exceptions have been reported - 4,114,726,339 cycles # 1.676 GHz - 6,333,274,004 instructions # 1.54 insn per cycle - 2.456739984 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) + 4,131,104,953 cycles # 1.707 GHz + 6,332,486,091 instructions # 1.53 insn per cycle + 2.421265188 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt index f82b55abd2..0a65f9fefe 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-15_09:27:41 +DATE: 2024-05-16_15:09:55 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 1 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.458587e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.486337e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.488763e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.458490e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.485387e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.487802e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.514727 sec +TOTAL : 0.511372 sec INFO: No Floating Point Exceptions have been reported - 2,093,578,737 cycles # 2.818 GHz - 3,271,199,664 instructions # 1.56 insn per cycle - 0.804252228 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 1 --curhst + 2,125,154,213 cycles # 2.818 GHz + 3,305,948,128 instructions # 1.56 insn per cycle + 0.811831996 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.162956e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.196994e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.198404e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.112497e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.145168e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.146541e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.074292 sec +TOTAL : 3.091906 sec INFO: No Floating Point Exceptions have been reported - 9,539,416,967 cycles # 2.854 GHz - 21,898,452,850 instructions # 2.30 insn per cycle - 3.398658909 seconds time elapsed + 9,555,297,501 cycles # 2.852 GHz + 20,467,928,496 instructions # 2.14 insn per cycle + 3.408325542 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266731198158133E-004 Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.836687e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.837552e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.837552e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.835837e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.836698e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.836698e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.938893 sec +TOTAL : 8.943298 sec INFO: No Floating Point Exceptions have been reported - 25,599,669,460 cycles # 2.863 GHz - 78,937,757,667 instructions # 3.08 insn per cycle - 8.943169699 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4893) (avx2: 0) (512y: 0) (512z: 0) + 25,616,203,937 cycles # 2.864 GHz + 78,941,981,933 instructions # 3.08 insn per cycle + 8.947377666 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4893) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.528823e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.532118e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.532118e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.509219e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.512397e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.512397e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.656663 sec +TOTAL : 4.682859 sec INFO: No Floating Point Exceptions have been reported - 12,879,833,669 cycles # 2.764 GHz - 39,279,836,544 instructions # 3.05 insn per cycle - 4.660830472 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13184) (avx2: 0) (512y: 0) (512z: 0) + 12,889,261,061 cycles # 2.751 GHz + 39,280,374,746 instructions # 3.05 insn per cycle + 4.687212544 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13184) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.856358e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.871880e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.871880e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.873155e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.889044e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.889044e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.097129 sec +TOTAL : 2.092539 sec INFO: No Floating Point Exceptions have been reported - 5,576,613,834 cycles # 2.655 GHz - 13,686,586,203 instructions # 2.45 insn per cycle - 2.101309122 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) + 5,573,290,015 cycles # 2.659 GHz + 13,685,575,452 instructions # 2.46 insn per cycle + 2.096738730 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.966580e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.986724e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.986724e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.869803e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.890096e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.890096e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.838635 sec +TOTAL : 1.858817 sec INFO: No Floating Point Exceptions have been reported - 4,888,562,247 cycles # 2.654 GHz - 12,341,706,398 instructions # 2.52 insn per cycle - 1.842925870 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) + 4,890,594,740 cycles # 2.626 GHz + 12,341,872,390 instructions # 2.52 insn per cycle + 1.863321950 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.714955e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.726732e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.726732e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.721792e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.733746e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.733746e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.451970 sec +TOTAL : 2.449544 sec INFO: No Floating Point Exceptions have been reported - 4,108,647,143 cycles # 1.673 GHz - 6,335,478,400 instructions # 1.54 insn per cycle - 2.456208702 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) + 4,111,968,902 cycles # 1.676 GHz + 6,335,563,564 instructions # 1.54 insn per cycle + 2.453951471 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt index f055e3f9ce..b300efd9c0 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt @@ -40,193 +40,193 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-15_09:24:58 +DATE: 2024-05-16_15:07:12 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst OMP= WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.174424e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.487943e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.490424e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.175456e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.487401e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.489887e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.518038 sec +TOTAL : 0.516778 sec INFO: No Floating Point Exceptions have been reported - 2,104,506,630 cycles # 2.820 GHz - 3,337,647,884 instructions # 1.59 insn per cycle - 0.807730228 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst + 2,106,838,284 cycles # 2.817 GHz + 3,334,047,065 instructions # 1.58 insn per cycle + 0.806903831 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst OMP= WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.697139e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.147776e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.149144e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.725415e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.181222e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.182613e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.231241 sec +TOTAL : 3.208931 sec INFO: No Floating Point Exceptions have been reported - 9,935,469,738 cycles # 2.849 GHz - 22,841,719,102 instructions # 2.30 insn per cycle - 3.545516982 seconds time elapsed + 9,884,616,856 cycles # 2.852 GHz + 22,569,706,597 instructions # 2.28 insn per cycle + 3.521271497 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266731198158133E-004 Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.836224e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.837114e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.837114e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.838805e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.839662e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.839662e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.941157 sec +TOTAL : 8.928794 sec INFO: No Floating Point Exceptions have been reported - 25,607,708,752 cycles # 2.863 GHz - 78,941,589,821 instructions # 3.08 insn per cycle - 8.945374108 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4893) (avx2: 0) (512y: 0) (512z: 0) + 25,578,535,475 cycles # 2.864 GHz + 78,941,438,017 instructions # 3.09 insn per cycle + 8.932959256 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4893) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.474198e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.477295e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.477295e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.490750e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.493870e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.493870e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.729705 sec +TOTAL : 4.708141 sec INFO: No Floating Point Exceptions have been reported - 12,895,914,246 cycles # 2.725 GHz - 39,280,577,356 instructions # 3.05 insn per cycle - 4.733927981 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13184) (avx2: 0) (512y: 0) (512z: 0) + 12,873,433,154 cycles # 2.733 GHz + 39,280,620,994 instructions # 3.05 insn per cycle + 4.712353785 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13184) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.859092e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.874494e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.874494e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.853620e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.869024e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.869024e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.096348 sec +TOTAL : 2.097850 sec INFO: No Floating Point Exceptions have been reported - 5,572,573,379 cycles # 2.654 GHz - 13,685,971,434 instructions # 2.46 insn per cycle - 2.100551652 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) + 5,573,477,429 cycles # 2.652 GHz + 13,685,909,410 instructions # 2.46 insn per cycle + 2.102047066 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.971088e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.991417e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.991417e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.970623e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.991129e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.991129e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.838609 sec +TOTAL : 1.837793 sec INFO: No Floating Point Exceptions have been reported - 4,886,906,640 cycles # 2.654 GHz - 12,340,946,515 instructions # 2.53 insn per cycle - 1.842847914 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) + 4,885,535,539 cycles # 2.653 GHz + 12,340,762,979 instructions # 2.53 insn per cycle + 1.841998870 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.727851e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.739515e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.739515e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.715803e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.727367e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.727367e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.447641 sec +TOTAL : 2.451983 sec INFO: No Floating Point Exceptions have been reported - 4,105,054,154 cycles # 1.675 GHz - 6,334,788,892 instructions # 1.54 insn per cycle - 2.451834789 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) + 4,110,713,398 cycles # 1.674 GHz + 6,334,867,690 instructions # 1.54 insn per cycle + 2.456147392 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt index 596c0e139f..254c65fd8c 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-15_08:58:13 +DATE: 2024-05-16_14:40:17 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.479340e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.502998e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.505330e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.472040e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.495257e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.497568e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.530199 sec +TOTAL : 0.530193 sec INFO: No Floating Point Exceptions have been reported - 2,205,377,507 cycles # 2.861 GHz - 3,376,536,792 instructions # 1.53 insn per cycle - 0.831148875 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/gcheck.exe -p 64 256 1 + 2,179,825,483 cycles # 2.820 GHz + 3,416,926,116 instructions # 1.57 insn per cycle + 0.832303660 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.151750e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.179384e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.180577e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.149957e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.177471e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.178689e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.017740 sec +TOTAL : 3.025570 sec INFO: No Floating Point Exceptions have been reported - 9,499,607,795 cycles # 2.896 GHz - 21,512,832,515 instructions # 2.26 insn per cycle - 3.335785909 seconds time elapsed + 9,343,829,120 cycles # 2.851 GHz + 20,017,847,921 instructions # 2.14 insn per cycle + 3.337093329 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266731198158133E-004 Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.875938e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.876826e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.876826e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.844549e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.845438e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.845438e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.751856 sec +TOTAL : 8.900941 sec INFO: No Floating Point Exceptions have been reported - 25,512,720,979 cycles # 2.914 GHz - 78,709,548,508 instructions # 3.09 insn per cycle - 8.755998767 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4264) (avx2: 0) (512y: 0) (512z: 0) + 25,492,945,375 cycles # 2.863 GHz + 78,715,017,784 instructions # 3.09 insn per cycle + 8.905151100 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4264) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.496486e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.499607e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.499607e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.432714e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.435728e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.435728e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.699409 sec +TOTAL : 4.786568 sec INFO: No Floating Point Exceptions have been reported - 12,966,578,114 cycles # 2.758 GHz - 39,226,351,463 instructions # 3.03 insn per cycle - 4.703633821 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:12951) (avx2: 0) (512y: 0) (512z: 0) + 12,968,671,480 cycles # 2.709 GHz + 39,227,279,421 instructions # 3.02 insn per cycle + 4.790848376 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:12951) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.959665e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.975475e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.975475e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.791500e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.806568e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.806568e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.069543 sec +TOTAL : 2.114108 sec INFO: No Floating Point Exceptions have been reported - 5,619,385,674 cycles # 2.711 GHz - 13,800,122,512 instructions # 2.46 insn per cycle - 2.073748619 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11422) (512y: 0) (512z: 0) + 5,617,875,214 cycles # 2.653 GHz + 13,801,216,605 instructions # 2.46 insn per cycle + 2.118326582 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11422) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.998297e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.018203e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.018203e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.808696e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.827867e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.827867e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.831905 sec +TOTAL : 1.871093 sec INFO: No Floating Point Exceptions have been reported - 4,976,755,667 cycles # 2.711 GHz - 12,465,998,942 instructions # 2.50 insn per cycle - 1.836219257 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10258) (512y: 240) (512z: 0) + 4,977,184,975 cycles # 2.656 GHz + 12,467,160,434 instructions # 2.50 insn per cycle + 1.875328468 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10258) (512y: 240) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.913751e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.925860e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.925860e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.708154e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.719459e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.719459e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.381526 sec +TOTAL : 2.454314 sec INFO: No Floating Point Exceptions have been reported - 4,118,252,785 cycles # 1.727 GHz - 6,458,362,100 instructions # 1.57 insn per cycle - 2.385687443 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1647) (512y: 192) (512z: 9375) + 4,118,637,907 cycles # 1.676 GHz + 6,458,862,875 instructions # 1.57 insn per cycle + 2.458530246 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1647) (512y: 192) (512z: 9375) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt index 4d6259fe05..452f4e853d 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-15_09:11:14 +DATE: 2024-05-16_14:53:22 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.244117e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.269243e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.271350e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.253411e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.278108e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.280152e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.537946 sec +TOTAL : 0.538797 sec INFO: No Floating Point Exceptions have been reported - 2,167,156,159 cycles # 2.821 GHz - 3,392,966,659 instructions # 1.57 insn per cycle - 0.824971117 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/gcheck.exe -p 64 256 1 + 2,198,780,840 cycles # 2.857 GHz + 3,392,092,682 instructions # 1.54 insn per cycle + 0.826434194 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.760050e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.786712e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.787882e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.756018e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.782691e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.783822e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.315956 sec +TOTAL : 3.316768 sec INFO: No Floating Point Exceptions have been reported - 10,224,332,396 cycles # 2.854 GHz - 23,077,972,962 instructions # 2.26 insn per cycle - 3.638688936 seconds time elapsed + 10,315,360,608 cycles # 2.881 GHz + 23,624,745,879 instructions # 2.29 insn per cycle + 3.638219909 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266731198158122E-004 Relative difference = 2.837296513854949e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.122268e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.122707e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.122707e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.179521e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.179964e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.179964e+02 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 39.794899 sec +TOTAL : 39.248920 sec INFO: No Floating Point Exceptions have been reported - 113,687,314,934 cycles # 2.857 GHz - 144,823,823,271 instructions # 1.27 insn per cycle - 39.799159131 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:21353) (avx2: 0) (512y: 0) (512z: 0) + 113,511,319,041 cycles # 2.892 GHz + 144,820,446,927 instructions # 1.28 insn per cycle + 39.253177511 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:21353) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198140450E-004 Relative difference = 2.83729918072716e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.013716e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.016144e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.016144e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.047626e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.050057e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.050057e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 5.450973 sec +TOTAL : 5.390362 sec INFO: No Floating Point Exceptions have been reported - 14,763,659,201 cycles # 2.707 GHz - 37,576,144,793 instructions # 2.55 insn per cycle - 5.455412399 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:68119) (avx2: 0) (512y: 0) (512z: 0) + 14,740,564,650 cycles # 2.733 GHz + 37,575,494,329 instructions # 2.55 insn per cycle + 5.394647902 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:68119) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141209E-004 Relative difference = 2.8372990661989057e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.154145e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.167309e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.167309e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.230737e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.243892e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.243892e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.301876 sec +TOTAL : 2.278344 sec INFO: No Floating Point Exceptions have been reported - 6,125,614,448 cycles # 2.657 GHz - 13,062,162,541 instructions # 2.13 insn per cycle - 2.306100470 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:46960) (512y: 0) (512z: 0) + 6,134,003,628 cycles # 2.689 GHz + 13,061,930,844 instructions # 2.13 insn per cycle + 2.282738143 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:46960) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198156789E-004 Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.664032e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.683149e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.683149e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.779670e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.799133e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.799133e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.902577 sec +TOTAL : 1.877722 sec INFO: No Floating Point Exceptions have been reported - 5,063,734,571 cycles # 2.656 GHz - 11,440,604,174 instructions # 2.26 insn per cycle - 1.906973772 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:40434) (512y: 285) (512z: 0) + 5,068,047,565 cycles # 2.694 GHz + 11,440,450,267 instructions # 2.26 insn per cycle + 1.882139324 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:40434) (512y: 285) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198156789E-004 Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.952458e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.964708e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.964708e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.093705e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.106755e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.106755e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.368892 sec +TOTAL : 2.321771 sec INFO: No Floating Point Exceptions have been reported - 3,971,908,402 cycles # 1.674 GHz - 5,943,368,618 instructions # 1.50 insn per cycle - 2.373284376 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2455) (512y: 337) (512z:39411) + 3,974,444,581 cycles # 1.709 GHz + 5,942,873,144 instructions # 1.50 insn per cycle + 2.326156002 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2455) (512y: 337) (512z:39411) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198156789E-004 Relative difference = 2.837296715097453e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt index c5d3a4f2ff..00ea23e18d 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-15_09:12:21 +DATE: 2024-05-16_14:54:29 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.275128e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.300396e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.302627e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.259147e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.284136e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.286360e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.537226 sec +TOTAL : 0.536782 sec INFO: No Floating Point Exceptions have been reported - 2,163,917,663 cycles # 2.821 GHz - 3,355,130,894 instructions # 1.55 insn per cycle - 0.824217982 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/gcheck.exe -p 64 256 1 + 2,193,506,190 cycles # 2.857 GHz + 3,337,314,407 instructions # 1.52 insn per cycle + 0.824492176 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.758771e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.785429e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.786568e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.761556e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.788263e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.789425e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.307138 sec +TOTAL : 3.301197 sec INFO: No Floating Point Exceptions have been reported - 10,162,507,375 cycles # 2.853 GHz - 22,962,240,536 instructions # 2.26 insn per cycle - 3.619532536 seconds time elapsed + 10,264,886,616 cycles # 2.886 GHz + 23,377,018,059 instructions # 2.28 insn per cycle + 3.615104997 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266731198158122E-004 Relative difference = 2.837296513854949e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.129690e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.130140e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.130140e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.170908e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.171353e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.171353e+02 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 39.722641 sec +TOTAL : 39.330349 sec INFO: No Floating Point Exceptions have been reported - 113,571,444,042 cycles # 2.859 GHz - 144,786,097,470 instructions # 1.27 insn per cycle - 39.726802831 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:20719) (avx2: 0) (512y: 0) (512z: 0) + 113,688,017,774 cycles # 2.891 GHz + 144,788,018,158 instructions # 1.27 insn per cycle + 39.334720458 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:20719) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198140461E-004 Relative difference = 2.8372991790910424e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.953753e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.955997e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.955997e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.974783e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.977013e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.977013e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 5.561657 sec +TOTAL : 5.522774 sec INFO: No Floating Point Exceptions have been reported - 15,215,969,839 cycles # 2.735 GHz - 37,766,601,033 instructions # 2.48 insn per cycle - 5.566026750 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:68447) (avx2: 0) (512y: 0) (512z: 0) + 15,220,566,650 cycles # 2.755 GHz + 37,763,046,074 instructions # 2.48 insn per cycle + 5.527045303 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:68447) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141209E-004 Relative difference = 2.8372990661989057e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.301121e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.314990e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.314990e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.412795e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.426610e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.426610e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.255685 sec +TOTAL : 2.222010 sec INFO: No Floating Point Exceptions have been reported - 5,997,807,839 cycles # 2.655 GHz - 12,896,119,219 instructions # 2.15 insn per cycle - 2.260006581 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:45929) (512y: 0) (512z: 0) + 6,000,419,836 cycles # 2.696 GHz + 12,896,174,142 instructions # 2.15 insn per cycle + 2.226315650 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:45929) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198156789E-004 Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.623672e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.642489e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.642489e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.743711e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.762861e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.762861e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.911380 sec +TOTAL : 1.885038 sec INFO: No Floating Point Exceptions have been reported - 5,087,031,928 cycles # 2.656 GHz - 11,446,947,598 instructions # 2.25 insn per cycle - 1.915663527 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:40123) (512y: 219) (512z: 0) + 5,086,798,971 cycles # 2.694 GHz + 11,447,968,989 instructions # 2.25 insn per cycle + 1.889284279 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:40123) (512y: 219) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198156789E-004 Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.996536e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.008825e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.008825e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.141072e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.153903e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.153903e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.353561 sec +TOTAL : 2.306249 sec INFO: No Floating Point Exceptions have been reported - 3,949,690,846 cycles # 1.676 GHz - 5,897,063,378 instructions # 1.49 insn per cycle - 2.357781802 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1971) (512y: 259) (512z:38937) + 3,947,559,408 cycles # 1.709 GHz + 5,896,754,674 instructions # 1.49 insn per cycle + 2.310527958 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1971) (512y: 259) (512z:38937) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198156789E-004 Relative difference = 2.837296715097453e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index cbfc88379e..15bbe59069 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-15_08:58:46 +DATE: 2024-05-16_14:40:50 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.371311e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.416400e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.421725e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.326887e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.370559e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.376371e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.485367 sec +TOTAL : 0.487782 sec INFO: No Floating Point Exceptions have been reported - 2,002,413,256 cycles # 2.854 GHz - 2,969,576,839 instructions # 1.48 insn per cycle - 0.758471447 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 1 + 1,984,813,926 cycles # 2.807 GHz + 2,933,686,219 instructions # 1.48 insn per cycle + 0.764328783 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.620584e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.682469e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.685265e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.584549e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.644337e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.647136e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.723049 sec +TOTAL : 1.720884 sec INFO: No Floating Point Exceptions have been reported - 5,638,197,939 cycles # 2.887 GHz - 11,952,390,273 instructions # 2.12 insn per cycle - 2.011373445 seconds time elapsed + 5,560,374,951 cycles # 2.849 GHz + 11,900,809,748 instructions # 2.14 insn per cycle + 2.008088048 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626454e-04 Avg ME (F77/GPU) = 6.6262659968156085E-004 Relative difference = 2.8371612387547027e-05 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.939700e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.940642e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.940642e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.909633e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.910547e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.910547e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.463553 sec +TOTAL : 8.596251 sec INFO: No Floating Point Exceptions have been reported - 24,602,533,365 cycles # 2.906 GHz - 78,128,390,385 instructions # 3.18 insn per cycle - 8.467657194 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3603) (avx2: 0) (512y: 0) (512z: 0) + 24,624,004,022 cycles # 2.864 GHz + 78,129,381,217 instructions # 3.17 insn per cycle + 8.600293639 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3603) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274863266294753E-004 Relative difference = 4.92840687132121e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.990292e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.003122e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.003122e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.891953e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.904635e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.904635e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.354252 sec +TOTAL : 2.387612 sec INFO: No Floating Point Exceptions have been reported - 6,470,915,919 cycles # 2.744 GHz - 20,120,315,745 instructions # 3.11 insn per cycle - 2.358972692 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) + 6,469,659,104 cycles # 2.706 GHz + 20,120,611,338 instructions # 3.11 insn per cycle + 2.391816623 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274861460025036E-004 Relative difference = 2.2029847170826283e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.598984e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.605530e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.605530e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.562010e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.568248e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.568248e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.034095 sec +TOTAL : 1.059004 sec INFO: No Floating Point Exceptions have been reported - 2,819,923,035 cycles # 2.718 GHz - 6,988,048,681 instructions # 2.48 insn per cycle - 1.038318254 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) + 2,818,181,262 cycles # 2.654 GHz + 6,988,460,270 instructions # 2.48 insn per cycle + 1.063195979 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174574524E-004 Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.815329e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.823687e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.823687e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.763183e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.771185e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.771185e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.911531 sec +TOTAL : 0.938662 sec INFO: No Floating Point Exceptions have been reported - 2,491,445,921 cycles # 2.723 GHz - 6,295,438,291 instructions # 2.53 insn per cycle - 0.915618567 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) + 2,488,393,509 cycles # 2.641 GHz + 6,295,244,635 instructions # 2.53 insn per cycle + 0.942828770 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174574524E-004 Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.400172e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.405292e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.405292e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.363218e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.368048e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.368048e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.179498 sec +TOTAL : 1.211358 sec INFO: No Floating Point Exceptions have been reported - 2,046,427,111 cycles # 1.730 GHz - 3,266,025,313 instructions # 1.60 insn per cycle - 1.183581210 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2427) (512y: 46) (512z: 9578) + 2,044,658,355 cycles # 1.683 GHz + 3,265,998,063 instructions # 1.60 insn per cycle + 1.215542758 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2427) (512y: 46) (512z: 9578) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627195e-04 Avg ME (F77/C++) = 6.6271952779373838E-004 Relative difference = 4.193891735414155e-08 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt index ed8784593a..e281ad389f 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt @@ -40,11 +40,11 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-15_09:21:13 +DATE: 2024-05-16_15:03:26 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -53,16 +53,16 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.592056e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.298774e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.298774e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.615502e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.322427e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.322427e+05 ) sec^-1 MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 -TOTAL : 0.475781 sec +TOTAL : 0.477115 sec INFO: No Floating Point Exceptions have been reported - 1,939,527,028 cycles # 2.815 GHz - 2,902,437,962 instructions # 1.50 insn per cycle - 0.747004510 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge + 1,936,349,619 cycles # 2.809 GHz + 2,877,179,431 instructions # 1.49 insn per cycle + 0.747561501 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) @@ -70,7 +70,7 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -79,29 +79,29 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.214912e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.514571e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.514571e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.243623e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.556013e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.556013e+05 ) sec^-1 MeanMatrixElemValue = ( 6.641710e+00 +- 4.994249e+00 ) GeV^-4 -TOTAL : 1.907786 sec +TOTAL : 1.907816 sec INFO: No Floating Point Exceptions have been reported - 6,122,056,386 cycles # 2.846 GHz - 12,557,649,687 instructions # 2.05 insn per cycle - 2.207056622 seconds time elapsed + 6,131,638,198 cycles # 2.845 GHz + 12,981,768,605 instructions # 2.12 insn per cycle + 2.213144159 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626454e-04 Avg ME (F77/GPU) = 6.6262659968156085E-004 Relative difference = 2.8371612387547027e-05 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -109,28 +109,28 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.906950e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.907876e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.907876e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.909165e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.910120e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.910120e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.611285 sec +TOTAL : 8.600727 sec INFO: No Floating Point Exceptions have been reported - 24,664,256,092 cycles # 2.863 GHz - 78,132,918,841 instructions # 3.17 insn per cycle - 8.615492497 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3603) (avx2: 0) (512y: 0) (512z: 0) + 24,637,778,479 cycles # 2.864 GHz + 78,132,610,249 instructions # 3.17 insn per cycle + 8.604942209 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3603) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274863266294753E-004 Relative difference = 4.92840687132121e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -138,28 +138,28 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.867477e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.879890e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.879890e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.457452e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.468775e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.468775e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.399085 sec +TOTAL : 2.550311 sec INFO: No Floating Point Exceptions have been reported - 6,481,045,907 cycles # 2.698 GHz - 20,129,947,819 instructions # 3.11 insn per cycle - 2.403366026 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) + 6,935,275,139 cycles # 2.716 GHz + 20,130,100,658 instructions # 2.90 insn per cycle + 2.554710358 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274861460025036E-004 Relative difference = 2.2029847170826283e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -167,28 +167,28 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.563122e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.569724e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.569724e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.550561e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.557160e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.557160e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.060074 sec +TOTAL : 1.068476 sec INFO: No Floating Point Exceptions have been reported - 2,825,592,732 cycles # 2.656 GHz - 6,997,999,144 instructions # 2.48 insn per cycle - 1.064449020 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) + 2,830,711,742 cycles # 2.640 GHz + 6,997,830,070 instructions # 2.47 insn per cycle + 1.072903816 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174574524E-004 Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -196,28 +196,28 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.771659e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.779871e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.779871e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.772339e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.780808e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.780808e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.936726 sec +TOTAL : 0.936387 sec INFO: No Floating Point Exceptions have been reported - 2,499,492,257 cycles # 2.658 GHz - 6,305,238,570 instructions # 2.52 insn per cycle - 0.941141246 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) + 2,497,824,247 cycles # 2.658 GHz + 6,305,168,616 instructions # 2.52 insn per cycle + 0.940674173 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174574524E-004 Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -225,22 +225,22 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.360130e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.365020e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.365020e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.362852e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.367803e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.367803e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.216987 sec +TOTAL : 1.214531 sec INFO: No Floating Point Exceptions have been reported - 2,054,846,027 cycles # 1.684 GHz - 3,277,226,537 instructions # 1.59 insn per cycle - 1.221302296 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2427) (512y: 46) (512z: 9578) + 2,054,265,568 cycles # 1.686 GHz + 3,276,400,100 instructions # 1.59 insn per cycle + 1.218830996 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2427) (512y: 46) (512z: 9578) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627195e-04 Avg ME (F77/C++) = 6.6271952779373838E-004 Relative difference = 4.193891735414155e-08 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt index 3255b1deef..1c3846a692 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-15_09:31:02 +DATE: 2024-05-16_15:13:15 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.323506e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.373675e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.379424e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.362325e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.415082e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.420724e+05 ) sec^-1 MeanMatrixElemValue = ( 4.159397e-01 +- 3.238804e-01 ) GeV^-4 -TOTAL : 0.472243 sec +TOTAL : 0.471632 sec INFO: No Floating Point Exceptions have been reported - 1,926,906,626 cycles # 2.811 GHz - 2,842,186,724 instructions # 1.47 insn per cycle - 0.743931745 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 1 --common + 1,968,514,245 cycles # 2.841 GHz + 2,902,581,432 instructions # 1.47 insn per cycle + 0.750206216 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --common ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.619196e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.692028e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.695553e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.620229e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.693332e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.696842e+05 ) sec^-1 MeanMatrixElemValue = ( 1.094367e+02 +- 1.071509e+02 ) GeV^-4 -TOTAL : 1.807131 sec +TOTAL : 1.810763 sec INFO: No Floating Point Exceptions have been reported - 5,799,524,002 cycles # 2.848 GHz - 12,052,890,185 instructions # 2.08 insn per cycle - 2.092585249 seconds time elapsed + 5,856,364,996 cycles # 2.870 GHz + 12,360,478,892 instructions # 2.11 insn per cycle + 2.100167053 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626454e-04 Avg ME (F77/GPU) = 6.6262659968156085E-004 Relative difference = 2.8371612387547027e-05 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.909596e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.910513e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.910513e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.921942e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.922884e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.922884e+03 ) sec^-1 MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 8.597641 sec +TOTAL : 8.542686 sec INFO: No Floating Point Exceptions have been reported - 24,643,205,050 cycles # 2.865 GHz - 78,127,351,887 instructions # 3.17 insn per cycle - 8.601700557 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3603) (avx2: 0) (512y: 0) (512z: 0) + 24,622,493,732 cycles # 2.881 GHz + 78,127,963,456 instructions # 3.17 insn per cycle + 8.546707601 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3603) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274863266294753E-004 Relative difference = 4.92840687132121e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.967025e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.979707e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.979707e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.925135e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.937766e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.937766e+03 ) sec^-1 MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 -TOTAL : 2.362883 sec +TOTAL : 2.378023 sec INFO: No Floating Point Exceptions have been reported - 6,478,067,487 cycles # 2.738 GHz - 20,118,736,323 instructions # 3.11 insn per cycle - 2.366901273 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) + 6,481,719,151 cycles # 2.722 GHz + 20,120,720,773 instructions # 3.10 insn per cycle + 2.382079719 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274861460025036E-004 Relative difference = 2.2029847170826283e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.562128e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.568498e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.568498e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.581338e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.587855e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.587855e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 1.059093 sec +TOTAL : 1.046370 sec INFO: No Floating Point Exceptions have been reported - 2,820,901,565 cycles # 2.655 GHz - 6,985,719,621 instructions # 2.48 insn per cycle - 1.063184068 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) + 2,822,358,408 cycles # 2.688 GHz + 6,985,542,199 instructions # 2.48 insn per cycle + 1.050425346 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174574524E-004 Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.768476e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.776638e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.776638e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.806198e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.814674e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.814674e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.937260 sec +TOTAL : 0.917862 sec INFO: No Floating Point Exceptions have been reported - 2,496,445,075 cycles # 2.654 GHz - 6,293,783,461 instructions # 2.52 insn per cycle - 0.941399878 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) + 2,496,110,223 cycles # 2.709 GHz + 6,293,657,033 instructions # 2.52 insn per cycle + 0.921934399 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174574524E-004 Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.360121e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.365073e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.365073e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.393764e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.398765e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.398765e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 1.215676 sec +TOTAL : 1.186576 sec INFO: No Floating Point Exceptions have been reported - 2,051,427,144 cycles # 1.683 GHz - 3,264,452,978 instructions # 1.59 insn per cycle - 1.219734902 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2427) (512y: 46) (512z: 9578) + 2,050,577,153 cycles # 1.723 GHz + 3,264,219,053 instructions # 1.59 insn per cycle + 1.190613213 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2427) (512y: 46) (512z: 9578) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627195e-04 Avg ME (F77/C++) = 6.6271952779373838E-004 Relative difference = 4.193891735414155e-08 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt index 76da60c048..97148e3ba7 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-15_09:28:14 +DATE: 2024-05-16_15:10:28 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 1 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.328799e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.380015e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.385833e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.326390e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.378340e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.384052e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.466913 sec +TOTAL : 0.468432 sec INFO: No Floating Point Exceptions have been reported - 1,916,716,842 cycles # 2.819 GHz - 2,909,024,684 instructions # 1.52 insn per cycle - 0.736711953 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 1 --curhst + 1,956,153,885 cycles # 2.820 GHz + 2,925,124,547 instructions # 1.50 insn per cycle + 0.750741002 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.577095e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.650934e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.654290e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.616852e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.690868e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.694290e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.755954 sec +TOTAL : 1.758835 sec INFO: No Floating Point Exceptions have been reported - 5,660,148,395 cycles # 2.847 GHz - 12,178,684,287 instructions # 2.15 insn per cycle - 2.045588387 seconds time elapsed + 5,694,632,258 cycles # 2.846 GHz + 12,170,382,669 instructions # 2.14 insn per cycle + 2.057387110 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626454e-04 Avg ME (F77/GPU) = 6.6262659968156085E-004 Relative difference = 2.8371612387547027e-05 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.908931e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.909878e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.909878e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.910170e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.911121e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.911121e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.599612 sec +TOTAL : 8.594641 sec INFO: No Floating Point Exceptions have been reported - 24,627,865,782 cycles # 2.863 GHz - 78,128,090,050 instructions # 3.17 insn per cycle - 8.603669298 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3603) (avx2: 0) (512y: 0) (512z: 0) + 24,610,525,016 cycles # 2.863 GHz + 78,132,278,540 instructions # 3.17 insn per cycle + 8.598723021 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3603) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274863266294753E-004 Relative difference = 4.92840687132121e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.850796e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.863212e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.863212e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.888566e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.900956e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.900956e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.402591 sec +TOTAL : 2.388862 sec INFO: No Floating Point Exceptions have been reported - 6,479,231,608 cycles # 2.695 GHz - 20,122,059,228 instructions # 3.11 insn per cycle - 2.406715396 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) + 6,476,954,136 cycles # 2.708 GHz + 20,121,920,046 instructions # 3.11 insn per cycle + 2.393015096 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274861460025036E-004 Relative difference = 2.2029847170826283e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.562566e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.568871e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.568871e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.562187e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.568435e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.568435e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.057934 sec +TOTAL : 1.057977 sec INFO: No Floating Point Exceptions have been reported - 2,818,265,341 cycles # 2.655 GHz - 6,988,053,760 instructions # 2.48 insn per cycle - 1.062033801 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) + 2,818,730,747 cycles # 2.656 GHz + 6,988,428,853 instructions # 2.48 insn per cycle + 1.062013241 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174574524E-004 Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.768732e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.776855e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.776855e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.767100e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.775059e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.775059e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.935438 sec +TOTAL : 0.936311 sec INFO: No Floating Point Exceptions have been reported - 2,490,297,733 cycles # 2.652 GHz - 6,295,843,677 instructions # 2.53 insn per cycle - 0.939633973 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) + 2,487,367,369 cycles # 2.647 GHz + 6,295,352,067 instructions # 2.53 insn per cycle + 0.940344403 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174574524E-004 Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.362182e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.367071e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.367071e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.354758e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.359575e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.359575e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.212324 sec +TOTAL : 1.218960 sec INFO: No Floating Point Exceptions have been reported - 2,046,105,310 cycles # 1.683 GHz - 3,266,428,243 instructions # 1.60 insn per cycle - 1.216361843 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2427) (512y: 46) (512z: 9578) + 2,057,435,423 cycles # 1.683 GHz + 3,266,628,935 instructions # 1.59 insn per cycle + 1.223151915 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2427) (512y: 46) (512z: 9578) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627195e-04 Avg ME (F77/C++) = 6.6271952779373838E-004 Relative difference = 4.193891735414155e-08 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt index 84a1accbff..dc12ca7aae 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt @@ -40,193 +40,193 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-15_09:25:31 +DATE: 2024-05-16_15:07:45 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst OMP= WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.733155e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.384139e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.389936e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.747793e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.405382e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.411341e+05 ) sec^-1 MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 -TOTAL : 0.471997 sec +TOTAL : 0.473580 sec INFO: No Floating Point Exceptions have been reported - 1,949,349,946 cycles # 2.815 GHz - 2,883,916,104 instructions # 1.48 insn per cycle - 0.749239635 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst + 1,929,031,590 cycles # 2.811 GHz + 2,902,080,173 instructions # 1.50 insn per cycle + 0.744461149 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst OMP= WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.466972e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.686495e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.689920e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.464876e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.690964e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.694375e+05 ) sec^-1 MeanMatrixElemValue = ( 6.641710e+00 +- 4.994249e+00 ) GeV^-4 -TOTAL : 1.837386 sec +TOTAL : 1.841417 sec INFO: No Floating Point Exceptions have been reported - 5,922,319,867 cycles # 2.847 GHz - 12,727,018,620 instructions # 2.15 insn per cycle - 2.135925623 seconds time elapsed + 5,892,322,421 cycles # 2.846 GHz + 12,206,550,799 instructions # 2.07 insn per cycle + 2.128532659 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626454e-04 Avg ME (F77/GPU) = 6.6262659968156085E-004 Relative difference = 2.8371612387547027e-05 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.908017e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.908934e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.908934e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.911509e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.912427e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.912427e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.603676 sec +TOTAL : 8.588000 sec INFO: No Floating Point Exceptions have been reported - 24,642,724,373 cycles # 2.863 GHz - 78,128,946,887 instructions # 3.17 insn per cycle - 8.607823851 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3603) (avx2: 0) (512y: 0) (512z: 0) + 24,603,486,303 cycles # 2.864 GHz + 78,128,844,221 instructions # 3.18 insn per cycle + 8.592028071 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3603) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274863266294753E-004 Relative difference = 4.92840687132121e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.881721e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.894028e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.894028e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.897521e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.909886e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.909886e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.391103 sec +TOTAL : 2.385730 sec INFO: No Floating Point Exceptions have been reported - 6,467,875,915 cycles # 2.701 GHz - 20,120,565,733 instructions # 3.11 insn per cycle - 2.395185690 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) + 6,477,077,766 cycles # 2.711 GHz + 20,121,628,941 instructions # 3.11 insn per cycle + 2.389950461 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274861460025036E-004 Relative difference = 2.2029847170826283e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.557471e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.563801e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.563801e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.564279e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.570570e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.570570e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.061218 sec +TOTAL : 1.056604 sec INFO: No Floating Point Exceptions have been reported - 2,818,223,714 cycles # 2.647 GHz - 6,988,155,543 instructions # 2.48 insn per cycle - 1.065311424 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) + 2,817,814,854 cycles # 2.658 GHz + 6,988,003,654 instructions # 2.48 insn per cycle + 1.060745031 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174574524E-004 Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.764482e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.772453e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.772453e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.769248e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.777272e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.777272e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.937612 sec +TOTAL : 0.935150 sec INFO: No Floating Point Exceptions have been reported - 2,491,490,656 cycles # 2.647 GHz - 6,295,363,549 instructions # 2.53 insn per cycle - 0.941754882 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) + 2,489,664,656 cycles # 2.652 GHz + 6,295,373,565 instructions # 2.53 insn per cycle + 0.939255376 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174574524E-004 Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.361210e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.365914e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.365914e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.359069e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.363945e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.363945e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.213135 sec +TOTAL : 1.214973 sec INFO: No Floating Point Exceptions have been reported - 2,046,512,114 cycles # 1.682 GHz - 3,265,943,047 instructions # 1.60 insn per cycle - 1.217215733 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2427) (512y: 46) (512z: 9578) + 2,049,104,437 cycles # 1.682 GHz + 3,266,431,248 instructions # 1.59 insn per cycle + 1.219018056 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2427) (512y: 46) (512z: 9578) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627195e-04 Avg ME (F77/C++) = 6.6271952779373838E-004 Relative difference = 4.193891735414155e-08 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt index e9cd0fab8d..fb9b3d5f50 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-15_08:59:11 +DATE: 2024-05-16_14:41:16 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.366106e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.412879e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.418387e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.355693e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.401036e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.406994e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.484833 sec +TOTAL : 0.484635 sec INFO: No Floating Point Exceptions have been reported - 2,009,324,153 cycles # 2.858 GHz - 2,980,982,469 instructions # 1.48 insn per cycle - 0.761343116 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/gcheck.exe -p 64 256 1 + 1,972,569,534 cycles # 2.816 GHz + 2,939,499,932 instructions # 1.49 insn per cycle + 0.757402101 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.612323e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.672623e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.675391e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.619113e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.679355e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.682149e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.716642 sec +TOTAL : 1.713739 sec INFO: No Floating Point Exceptions have been reported - 5,662,740,437 cycles # 2.893 GHz - 11,353,972,089 instructions # 2.01 insn per cycle - 2.014594393 seconds time elapsed + 5,540,767,327 cycles # 2.848 GHz + 11,699,037,597 instructions # 2.11 insn per cycle + 2.001424634 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626454e-04 Avg ME (F77/GPU) = 6.6262659968156085E-004 Relative difference = 2.8371612387547027e-05 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.952072e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.953022e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.953022e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.914726e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.915647e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.915647e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.410163 sec +TOTAL : 8.573445 sec INFO: No Floating Point Exceptions have been reported - 24,530,090,353 cycles # 2.916 GHz - 77,854,592,626 instructions # 3.17 insn per cycle - 8.414276248 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3114) (avx2: 0) (512y: 0) (512z: 0) + 24,558,709,341 cycles # 2.863 GHz + 77,854,833,330 instructions # 3.17 insn per cycle + 8.577561930 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3114) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627487e-04 Avg ME (F77/C++) = 6.6274866268634797E-004 Relative difference = 5.630135835748959e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.109913e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.123055e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.123055e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.975982e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.989277e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.989277e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.314562 sec +TOTAL : 2.359034 sec INFO: No Floating Point Exceptions have been reported - 6,419,654,507 cycles # 2.769 GHz - 20,085,518,289 instructions # 3.13 insn per cycle - 2.318756911 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13452) (avx2: 0) (512y: 0) (512z: 0) + 6,427,044,365 cycles # 2.721 GHz + 20,086,102,386 instructions # 3.13 insn per cycle + 2.363343503 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13452) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274861465384638E-004 Relative difference = 2.211071647257023e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.526248e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.532430e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.532430e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.504468e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.510257e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.510257e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.082391 sec +TOTAL : 1.097904 sec INFO: No Floating Point Exceptions have been reported - 2,916,919,637 cycles # 2.686 GHz - 7,129,894,032 instructions # 2.44 insn per cycle - 1.086400028 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:12261) (512y: 0) (512z: 0) + 2,922,790,348 cycles # 2.654 GHz + 7,129,934,034 instructions # 2.44 insn per cycle + 1.101954791 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12261) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271939668077068E-004 Relative difference = 5.008498817890231e-09 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.724869e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.732671e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.732671e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.699271e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.706623e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.706623e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.958871 sec +TOTAL : 0.973114 sec INFO: No Floating Point Exceptions have been reported - 2,599,161,158 cycles # 2.701 GHz - 6,439,212,709 instructions # 2.48 insn per cycle - 0.962903376 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11276) (512y: 27) (512z: 0) + 2,595,556,237 cycles # 2.658 GHz + 6,438,662,691 instructions # 2.48 insn per cycle + 0.977341866 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11276) (512y: 27) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271939668077068E-004 Relative difference = 5.008498817890231e-09 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.354018e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.358675e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.358675e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.316379e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.321013e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.321013e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.219384 sec +TOTAL : 1.254012 sec INFO: No Floating Point Exceptions have been reported - 2,118,745,707 cycles # 1.733 GHz - 3,427,593,513 instructions # 1.62 insn per cycle - 1.223495203 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2924) (512y: 22) (512z: 9654) + 2,116,081,195 cycles # 1.683 GHz + 3,427,806,501 instructions # 1.62 insn per cycle + 1.258282002 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2924) (512y: 22) (512z: 9654) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627195e-04 Avg ME (F77/C++) = 6.6271952032322112E-004 Relative difference = 3.066639970473621e-08 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt index 4458ee928f..3f8f67a608 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-15_09:13:29 +DATE: 2024-05-16_14:55:36 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.547623e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.587705e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.592904e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.546626e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.586411e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.590653e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.494801 sec +TOTAL : 0.493388 sec INFO: No Floating Point Exceptions have been reported - 2,071,828,549 cycles # 2.818 GHz - 3,075,474,522 instructions # 1.48 insn per cycle - 0.793077205 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/gcheck.exe -p 64 256 1 + 2,052,942,224 cycles # 2.851 GHz + 3,071,897,705 instructions # 1.50 insn per cycle + 0.778091403 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.752855e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.811492e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.814175e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.711232e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.769137e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.771784e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.863794 sec +TOTAL : 1.862053 sec INFO: No Floating Point Exceptions have been reported - 5,990,387,426 cycles # 2.848 GHz - 12,901,532,368 instructions # 2.15 insn per cycle - 2.159413441 seconds time elapsed + 6,039,204,312 cycles # 2.872 GHz + 11,937,016,347 instructions # 1.98 insn per cycle + 2.158356809 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626454e-04 Avg ME (F77/GPU) = 6.6262660579844562E-004 Relative difference = 2.836238137986709e-05 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.440520e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.441267e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.441267e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.454006e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.454774e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.454774e+02 ) sec^-1 MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 -TOTAL : 30.153753 sec +TOTAL : 30.077717 sec INFO: No Floating Point Exceptions have been reported - 86,241,694,269 cycles # 2.860 GHz - 135,579,841,827 instructions # 1.57 insn per cycle - 30.157843376 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:15593) (avx2: 0) (512y: 0) (512z: 0) + 86,228,096,895 cycles # 2.867 GHz + 135,581,749,205 instructions # 1.57 insn per cycle + 30.081848617 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:15593) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627535e-04 Avg ME (F77/C++) = 6.6275351196781740E-004 Relative difference = 1.805772034719401e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.670546e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.682391e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.682391e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.767198e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.779100e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.779100e+03 ) sec^-1 MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 -TOTAL : 2.467623 sec +TOTAL : 2.432356 sec INFO: No Floating Point Exceptions have been reported - 6,770,215,775 cycles # 2.740 GHz - 19,386,112,198 instructions # 2.86 insn per cycle - 2.471854770 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:69681) (avx2: 0) (512y: 0) (512z: 0) + 6,776,462,064 cycles # 2.783 GHz + 19,386,992,522 instructions # 2.86 insn per cycle + 2.436630257 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:69681) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274862707273868E-004 Relative difference = 4.0849182767952624e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.382738e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.387638e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.387638e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.415254e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.420302e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.420302e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 1.194394 sec +TOTAL : 1.167177 sec INFO: No Floating Point Exceptions have been reported - 3,181,944,591 cycles # 2.656 GHz - 6,807,632,796 instructions # 2.14 insn per cycle - 1.198610229 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:49077) (512y: 0) (512z: 0) + 3,174,327,264 cycles # 2.711 GHz + 6,807,988,001 instructions # 2.14 insn per cycle + 1.171487938 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:49077) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627273e-04 Avg ME (F77/C++) = 6.6272731558747466E-004 Relative difference = 2.3520194007978538e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.675256e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.682394e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.682394e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.702865e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.710950e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.710950e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 0.987158 sec +TOTAL : 0.971352 sec INFO: No Floating Point Exceptions have been reported - 2,637,251,724 cycles # 2.662 GHz - 5,986,082,953 instructions # 2.27 insn per cycle - 0.991313558 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:42677) (512y: 11) (512z: 0) + 2,641,359,018 cycles # 2.709 GHz + 5,985,956,533 instructions # 2.27 insn per cycle + 0.975633569 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:42677) (512y: 11) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627273e-04 Avg ME (F77/C++) = 6.6272731558747466E-004 Relative difference = 2.3520194007978538e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.337403e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.342008e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.342008e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.382138e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.387143e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.387143e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060904e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.234823 sec +TOTAL : 1.195051 sec INFO: No Floating Point Exceptions have been reported - 2,079,754,188 cycles # 1.680 GHz - 3,501,095,607 instructions # 1.68 insn per cycle - 1.239081052 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5210) (512y: 3) (512z:44829) + 2,079,765,601 cycles # 1.735 GHz + 3,501,460,071 instructions # 1.68 insn per cycle + 1.199295448 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5210) (512y: 3) (512z:44829) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627275e-04 Avg ME (F77/C++) = 6.6272750363879224E-004 Relative difference = 5.490631193034436e-09 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt index fb291f957e..f651d28060 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-15_09:14:18 +DATE: 2024-05-16_14:56:25 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.501123e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.540237e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.545384e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.511551e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.549792e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.554590e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.496418 sec +TOTAL : 0.495451 sec INFO: No Floating Point Exceptions have been reported - 2,075,811,223 cycles # 2.818 GHz - 3,117,901,600 instructions # 1.50 insn per cycle - 0.794898428 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/gcheck.exe -p 64 256 1 + 2,081,587,427 cycles # 2.833 GHz + 3,058,350,902 instructions # 1.47 insn per cycle + 0.791962629 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.655063e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.712805e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.715432e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.609535e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.664835e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.667390e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.882952 sec +TOTAL : 1.882393 sec INFO: No Floating Point Exceptions have been reported - 6,029,571,719 cycles # 2.845 GHz - 13,064,379,561 instructions # 2.17 insn per cycle - 2.178422753 seconds time elapsed + 6,108,387,546 cycles # 2.880 GHz + 12,340,826,531 instructions # 2.02 insn per cycle + 2.177538628 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626454e-04 Avg ME (F77/GPU) = 6.6262660579844562E-004 Relative difference = 2.836238137986709e-05 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.411371e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.412111e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.412111e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.501753e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.502516e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.502516e+02 ) sec^-1 MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 -TOTAL : 30.314976 sec +TOTAL : 29.816879 sec INFO: No Floating Point Exceptions have been reported - 86,626,378,196 cycles # 2.858 GHz - 135,996,101,788 instructions # 1.57 insn per cycle - 30.319079477 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:15571) (avx2: 0) (512y: 0) (512z: 0) + 86,332,810,324 cycles # 2.895 GHz + 136,005,056,328 instructions # 1.58 insn per cycle + 29.820915946 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:15571) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627535e-04 Avg ME (F77/C++) = 6.6275348988418387E-004 Relative difference = 1.5263316105958472e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.540707e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.551820e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.551820e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.665500e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.677672e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.677672e+03 ) sec^-1 MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 -TOTAL : 2.515575 sec +TOTAL : 2.468426 sec INFO: No Floating Point Exceptions have been reported - 6,857,543,798 cycles # 2.723 GHz - 19,438,697,676 instructions # 2.83 insn per cycle - 2.519792052 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:69723) (avx2: 0) (512y: 0) (512z: 0) + 6,845,942,231 cycles # 2.769 GHz + 19,438,050,467 instructions # 2.84 insn per cycle + 2.472841886 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:69723) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274862764021530E-004 Relative difference = 4.170542995014107e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.404571e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.409616e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.409616e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.417376e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.422501e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.422501e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 1.175922 sec +TOTAL : 1.165327 sec INFO: No Floating Point Exceptions have been reported - 3,127,737,539 cycles # 2.651 GHz - 6,718,846,853 instructions # 2.15 insn per cycle - 1.180394712 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:47667) (512y: 0) (512z: 0) + 3,124,352,057 cycles # 2.673 GHz + 6,718,803,660 instructions # 2.15 insn per cycle + 1.169556736 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:47667) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627273e-04 Avg ME (F77/C++) = 6.6272731651051409E-004 Relative difference = 2.4912983202981302e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.670600e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.677947e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.677947e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.691458e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.698947e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.698947e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 0.989790 sec +TOTAL : 0.977716 sec INFO: No Floating Point Exceptions have been reported - 2,639,877,826 cycles # 2.658 GHz - 5,969,675,763 instructions # 2.26 insn per cycle - 0.994097973 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:41842) (512y: 13) (512z: 0) + 2,637,169,918 cycles # 2.688 GHz + 5,969,286,098 instructions # 2.26 insn per cycle + 0.981942660 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:41842) (512y: 13) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627273e-04 Avg ME (F77/C++) = 6.6272731651051409E-004 Relative difference = 2.4912983202981302e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.337480e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.342044e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.342044e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.359514e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.364207e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.364207e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060904e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.234650 sec +TOTAL : 1.214640 sec INFO: No Floating Point Exceptions have been reported - 2,078,705,261 cycles # 1.679 GHz - 3,494,560,286 instructions # 1.68 insn per cycle - 1.238808756 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4174) (512y: 4) (512z:44472) + 2,077,190,375 cycles # 1.705 GHz + 3,494,266,618 instructions # 1.68 insn per cycle + 1.219012886 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4174) (512y: 4) (512z:44472) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627275e-04 Avg ME (F77/C++) = 6.6272750384530066E-004 Relative difference = 5.80223501432476e-09 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 871f3f0674..19b36f52e3 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-15_08:59:36 +DATE: 2024-05-16_14:41:41 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.448055e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.471732e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.473882e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.461685e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.485212e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.487571e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.528517 sec +TOTAL : 0.529512 sec INFO: No Floating Point Exceptions have been reported - 2,206,186,016 cycles # 2.854 GHz - 3,406,499,343 instructions # 1.54 insn per cycle - 0.832615871 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/gcheck.exe -p 64 256 1 + 2,178,837,447 cycles # 2.822 GHz + 3,364,663,947 instructions # 1.54 insn per cycle + 0.830788537 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.121788e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.148994e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.150173e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.126264e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.153475e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.154649e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.032259 sec +TOTAL : 3.029127 sec INFO: No Floating Point Exceptions have been reported - 9,483,811,613 cycles # 2.887 GHz - 21,728,837,964 instructions # 2.29 insn per cycle - 3.343241654 seconds time elapsed + 9,370,132,482 cycles # 2.854 GHz + 19,961,685,193 instructions # 2.13 insn per cycle + 3.339329204 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266732376103494E-004 Relative difference = 2.659538381540814e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.842230e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.843084e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.843084e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.814801e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.815638e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.815638e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.911947 sec +TOTAL : 9.046315 sec INFO: No Floating Point Exceptions have been reported - 25,916,297,719 cycles # 2.907 GHz - 79,437,751,444 instructions # 3.07 insn per cycle - 8.916043515 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4858) (avx2: 0) (512y: 0) (512z: 0) + 25,898,843,827 cycles # 2.862 GHz + 79,438,691,532 instructions # 3.07 insn per cycle + 9.050548799 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4858) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731406016235E-004 Relative difference = 2.8059296349552523e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.498308e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.501464e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.501464e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.427576e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.430583e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.430583e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.697616 sec +TOTAL : 4.794109 sec INFO: No Floating Point Exceptions have been reported - 12,720,201,550 cycles # 2.706 GHz - 38,549,576,662 instructions # 3.03 insn per cycle - 4.701883730 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13163) (avx2: 0) (512y: 0) (512z: 0) + 12,707,110,349 cycles # 2.649 GHz + 38,549,995,901 instructions # 3.03 insn per cycle + 4.798432518 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13163) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730246908442E-004 Relative difference = 2.98084507782618e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.987897e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.003874e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.003874e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.947160e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.962973e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.962973e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.063104 sec +TOTAL : 2.073357 sec INFO: No Floating Point Exceptions have been reported - 5,528,470,581 cycles # 2.677 GHz - 13,482,584,347 instructions # 2.44 insn per cycle - 2.067345486 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11242) (512y: 0) (512z: 0) + 5,517,673,615 cycles # 2.658 GHz + 13,479,814,632 instructions # 2.44 insn per cycle + 2.077628129 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11242) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730409276857E-004 Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.262373e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.284301e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.284301e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.092853e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.113453e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.113453e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.780175 sec +TOTAL : 1.813039 sec INFO: No Floating Point Exceptions have been reported - 4,830,159,700 cycles # 2.708 GHz - 12,135,848,860 instructions # 2.51 insn per cycle - 1.784339719 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10154) (512y: 79) (512z: 0) + 4,828,852,439 cycles # 2.658 GHz + 12,135,084,334 instructions # 2.51 insn per cycle + 1.817332368 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10154) (512y: 79) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730409276857E-004 Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.855143e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.867012e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.867012e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.671146e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.682190e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.682190e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.402299 sec +TOTAL : 2.468744 sec INFO: No Floating Point Exceptions have been reported - 4,146,104,419 cycles # 1.724 GHz - 6,336,145,765 instructions # 1.53 insn per cycle - 2.406581725 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1803) (512y: 93) (512z: 9358) + 4,141,507,976 cycles # 1.676 GHz + 6,337,241,929 instructions # 1.53 insn per cycle + 2.472886901 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1803) (512y: 93) (512z: 9358) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730409276857E-004 Relative difference = 2.956342832710188e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt index b1fc786479..10c707e81e 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-15_09:00:09 +DATE: 2024-05-16_14:42:15 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.479154e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.502080e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.504549e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.482619e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.506518e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.508744e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.526739 sec +TOTAL : 0.530792 sec INFO: No Floating Point Exceptions have been reported - 2,200,293,747 cycles # 2.851 GHz - 3,346,973,298 instructions # 1.52 insn per cycle - 0.832044919 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/gcheck.exe -p 64 256 1 + 2,175,803,522 cycles # 2.817 GHz + 3,378,965,043 instructions # 1.55 insn per cycle + 0.832396723 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.140062e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.167552e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.168734e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.148343e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.175835e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.177034e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.020713 sec +TOTAL : 3.021680 sec INFO: No Floating Point Exceptions have been reported - 9,435,366,326 cycles # 2.881 GHz - 21,605,435,243 instructions # 2.29 insn per cycle - 3.330954832 seconds time elapsed + 9,373,127,330 cycles # 2.855 GHz + 21,008,547,067 instructions # 2.24 insn per cycle + 3.339100414 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266732376103494E-004 Relative difference = 2.659538381540814e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.833316e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.834177e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.834177e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.816314e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.817146e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.817146e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.956408 sec +TOTAL : 9.038908 sec INFO: No Floating Point Exceptions have been reported - 25,877,692,564 cycles # 2.889 GHz - 79,450,699,359 instructions # 3.07 insn per cycle - 8.960811501 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4505) (avx2: 0) (512y: 0) (512z: 0) + 25,883,565,851 cycles # 2.863 GHz + 79,454,182,113 instructions # 3.07 insn per cycle + 9.043041112 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4505) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731406016235E-004 Relative difference = 2.8059296349552523e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.503582e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.506712e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.506712e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.444359e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.447378e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.447378e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.690551 sec +TOTAL : 4.770841 sec INFO: No Floating Point Exceptions have been reported - 12,673,056,790 cycles # 2.700 GHz - 38,520,866,041 instructions # 3.04 insn per cycle - 4.694808939 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:12930) (avx2: 0) (512y: 0) (512z: 0) + 12,673,930,469 cycles # 2.656 GHz + 38,521,208,960 instructions # 3.04 insn per cycle + 4.775036357 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:12930) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730246908442E-004 Relative difference = 2.98084507782618e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.015244e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.031118e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.031118e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.869925e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.885399e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.885399e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.055513 sec +TOTAL : 2.093382 sec INFO: No Floating Point Exceptions have been reported - 5,573,540,691 cycles # 2.707 GHz - 13,605,499,057 instructions # 2.44 insn per cycle - 2.059674853 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11327) (512y: 0) (512z: 0) + 5,571,627,209 cycles # 2.657 GHz + 13,607,217,607 instructions # 2.44 insn per cycle + 2.097652206 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11327) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730409276857E-004 Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.105038e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.125763e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.125763e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.920636e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.941759e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.941759e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.810578 sec +TOTAL : 1.847928 sec INFO: No Floating Point Exceptions have been reported - 4,912,374,631 cycles # 2.708 GHz - 12,272,252,618 instructions # 2.50 insn per cycle - 1.814800349 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10143) (512y: 239) (512z: 0) + 4,911,801,030 cycles # 2.653 GHz + 12,271,296,407 instructions # 2.50 insn per cycle + 1.852091714 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10143) (512y: 239) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730409276857E-004 Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check.exe -p 64 256 1 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.831320e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.842743e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.842743e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.658898e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.670376e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.670376e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.410208 sec +TOTAL : 2.472418 sec INFO: No Floating Point Exceptions have been reported - 4,149,223,707 cycles # 1.719 GHz - 6,443,281,470 instructions # 1.55 insn per cycle - 2.414332469 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1628) (512y: 191) (512z: 9356) + 4,148,038,447 cycles # 1.675 GHz + 6,442,551,576 instructions # 1.55 insn per cycle + 2.476725391 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1628) (512y: 191) (512z: 9356) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730409276857E-004 Relative difference = 2.956342832710188e-07 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index 5b64325983..5b5bd116a3 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-15_09:02:00 +DATE: 2024-05-16_14:44:06 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/gcheck.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.068667e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.069058e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.069231e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.065678e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.066059e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.066269e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.432023 sec +TOTAL : 2.433623 sec INFO: No Floating Point Exceptions have been reported - 7,962,061,333 cycles # 2.891 GHz - 17,842,918,220 instructions # 2.24 insn per cycle - 2.812445119 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/gcheck.exe -p 1 256 1 + 7,848,341,490 cycles # 2.847 GHz + 17,462,165,188 instructions # 2.22 insn per cycle + 2.813787845 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.282838e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.284842e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.285132e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.279047e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.280919e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.281197e+03 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.997752 sec +TOTAL : 3.985795 sec INFO: No Floating Point Exceptions have been reported - 12,544,904,873 cycles # 2.901 GHz - 27,763,638,781 instructions # 2.21 insn per cycle - 4.380155266 seconds time elapsed + 12,317,841,072 cycles # 2.855 GHz + 29,065,647,551 instructions # 2.36 insn per cycle + 4.369250222 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 9.872263e-03 Avg ME (F77/GPU) = 9.8722595284406640E-003 Relative difference = 3.5164777671934515e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.874055e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.874298e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.874298e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.769533e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.769744e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.769744e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.708004 sec +TOTAL : 6.808252 sec INFO: No Floating Point Exceptions have been reported - 18,789,307,142 cycles # 2.800 GHz - 53,916,081,627 instructions # 2.87 insn per cycle - 6.711948866 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:32448) (avx2: 0) (512y: 0) (512z: 0) + 18,798,528,369 cycles # 2.760 GHz + 53,916,630,138 instructions # 2.87 insn per cycle + 6.812355714 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32448) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.563882e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.563969e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.563969e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.539980e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.540062e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.540062e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.380877 sec +TOTAL : 3.433152 sec INFO: No Floating Point Exceptions have been reported - 9,819,609,469 cycles # 2.902 GHz - 27,092,420,480 instructions # 2.76 insn per cycle - 3.384888858 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96443) (avx2: 0) (512y: 0) (512z: 0) + 9,799,231,624 cycles # 2.852 GHz + 27,092,581,938 instructions # 2.76 insn per cycle + 3.437235180 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:96443) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.396404e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.396834e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.396834e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.326889e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.327284e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.327284e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.559910 sec +TOTAL : 1.592072 sec INFO: No Floating Point Exceptions have been reported - 4,214,781,215 cycles # 2.696 GHz - 9,560,689,751 instructions # 2.27 insn per cycle - 1.563893957 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84390) (512y: 0) (512z: 0) + 4,220,179,984 cycles # 2.645 GHz + 9,560,887,701 instructions # 2.27 insn per cycle + 1.596045022 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84390) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.840945e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.841459e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.841459e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.770010e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.770613e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.770613e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.379348 sec +TOTAL : 1.405649 sec INFO: No Floating Point Exceptions have been reported - 3,753,155,368 cycles # 2.714 GHz - 8,484,753,328 instructions # 2.26 insn per cycle - 1.383331847 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:79991) (512y: 91) (512z: 0) + 3,726,923,548 cycles # 2.645 GHz + 8,484,897,516 instructions # 2.28 insn per cycle + 1.409716339 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:79991) (512y: 91) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.355106e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.355613e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.355613e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.281739e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.282258e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.282258e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.579328 sec +TOTAL : 1.614138 sec INFO: No Floating Point Exceptions have been reported - 2,715,728,873 cycles # 1.716 GHz - 4,273,895,295 instructions # 1.57 insn per cycle - 1.583286672 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2284) (512y: 105) (512z:79105) + 2,690,163,143 cycles # 1.663 GHz + 4,272,866,756 instructions # 1.59 insn per cycle + 1.618172762 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2284) (512y: 105) (512z:79105) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt index 0da0f4b152..1be1cfeedf 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt @@ -40,11 +40,11 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-15_09:21:39 +DATE: 2024-05-16_15:03:52 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/gcheck.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -53,16 +53,16 @@ WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gp Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.066985e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.067950e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.067950e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.063403e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.064386e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.064386e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.381273 sec +TOTAL : 2.395260 sec INFO: No Floating Point Exceptions have been reported - 7,706,027,501 cycles # 2.851 GHz - 16,286,663,405 instructions # 2.11 insn per cycle - 2.759816734 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/gcheck.exe -p 1 256 1 --bridge + 7,755,019,436 cycles # 2.852 GHz + 17,230,726,903 instructions # 2.22 insn per cycle + 2.775067655 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) @@ -70,7 +70,7 @@ WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gp ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -79,29 +79,29 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.231814e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.267464e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.267464e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.263412e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.297943e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.297943e+03 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.986121 sec +TOTAL : 3.971807 sec INFO: No Floating Point Exceptions have been reported - 12,303,392,195 cycles # 2.856 GHz - 29,197,517,966 instructions # 2.37 insn per cycle - 4.366638096 seconds time elapsed + 12,283,103,403 cycles # 2.855 GHz + 27,758,308,143 instructions # 2.26 insn per cycle + 4.357937638 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 9.872263e-03 Avg ME (F77/GPU) = 9.8722595284406640E-003 Relative difference = 3.5164777671934515e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -109,28 +109,28 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.356883e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.357084e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.357084e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.361041e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.361236e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.361236e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 7.181391 sec +TOTAL : 7.177191 sec INFO: No Floating Point Exceptions have been reported - 18,963,162,368 cycles # 2.640 GHz - 53,918,714,635 instructions # 2.84 insn per cycle - 7.185419358 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:32448) (avx2: 0) (512y: 0) (512z: 0) + 18,941,402,140 cycles # 2.638 GHz + 53,918,413,850 instructions # 2.85 insn per cycle + 7.181202320 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32448) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -138,28 +138,28 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.532810e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.532897e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.532897e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.538220e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.538308e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.538308e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.449678 sec +TOTAL : 3.437404 sec INFO: No Floating Point Exceptions have been reported - 9,860,760,253 cycles # 2.856 GHz - 27,094,035,576 instructions # 2.75 insn per cycle - 3.453719819 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96443) (avx2: 0) (512y: 0) (512z: 0) + 9,826,263,323 cycles # 2.856 GHz + 27,093,421,705 instructions # 2.76 insn per cycle + 3.441586183 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:96443) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -167,28 +167,28 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.310317e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.310722e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.310722e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.320877e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.321321e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.321321e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.600893 sec +TOTAL : 1.595267 sec INFO: No Floating Point Exceptions have been reported - 4,227,936,122 cycles # 2.635 GHz - 9,561,772,642 instructions # 2.26 insn per cycle - 1.604964741 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84390) (512y: 0) (512z: 0) + 4,226,325,559 cycles # 2.644 GHz + 9,562,000,988 instructions # 2.26 insn per cycle + 1.599357751 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84390) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -196,28 +196,28 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.779364e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.779891e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.779891e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.768698e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.769266e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.769266e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.402471 sec +TOTAL : 1.406299 sec INFO: No Floating Point Exceptions have been reported - 3,727,801,740 cycles # 2.652 GHz - 8,485,976,731 instructions # 2.28 insn per cycle - 1.406514546 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:79991) (512y: 91) (512z: 0) + 3,728,202,948 cycles # 2.645 GHz + 8,485,828,873 instructions # 2.28 insn per cycle + 1.410433353 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:79991) (512y: 91) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -225,22 +225,22 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.270173e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.270653e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.270653e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.279021e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.279511e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.279511e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.620234 sec +TOTAL : 1.615850 sec INFO: No Floating Point Exceptions have been reported - 2,696,976,380 cycles # 1.661 GHz - 4,275,004,138 instructions # 1.59 insn per cycle - 1.624259831 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2284) (512y: 105) (512z:79105) + 2,693,497,833 cycles # 1.663 GHz + 4,273,840,765 instructions # 1.59 insn per cycle + 1.620067219 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2284) (512y: 105) (512z:79105) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt index bbe92dc32c..3f519fda03 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-15_09:02:54 +DATE: 2024-05-16_14:45:01 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/gcheck.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.057773e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.058162e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.058394e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.065431e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.065819e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.066015e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.432670 sec +TOTAL : 2.431749 sec INFO: No Floating Point Exceptions have been reported - 7,976,180,051 cycles # 2.894 GHz - 18,205,979,560 instructions # 2.28 insn per cycle - 2.812149746 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/gcheck.exe -p 1 256 1 + 7,858,443,167 cycles # 2.852 GHz + 17,797,449,482 instructions # 2.26 insn per cycle + 2.810886675 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.207912e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.209784e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.210050e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.189050e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.190883e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.191132e+03 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 4.010061 sec +TOTAL : 4.007219 sec INFO: No Floating Point Exceptions have been reported - 12,545,103,701 cycles # 2.894 GHz - 29,508,105,022 instructions # 2.35 insn per cycle - 4.393055123 seconds time elapsed + 12,388,147,716 cycles # 2.856 GHz + 29,572,084,158 instructions # 2.39 insn per cycle + 4.393669645 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 9.872263e-03 Avg ME (F77/GPU) = 9.8722595284406640E-003 Relative difference = 3.5164777671934515e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.584447e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.584650e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.584650e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.555801e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.556015e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.556015e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.965610 sec +TOTAL : 6.995892 sec INFO: No Floating Point Exceptions have been reported - 18,813,969,937 cycles # 2.700 GHz - 53,926,042,619 instructions # 2.87 insn per cycle - 6.969544126 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:32063) (avx2: 0) (512y: 0) (512z: 0) + 18,868,341,179 cycles # 2.696 GHz + 53,930,114,085 instructions # 2.86 insn per cycle + 6.999840535 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32063) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.576698e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.576791e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.576791e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.547899e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.547983e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.547983e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.353494 sec +TOTAL : 3.415669 sec INFO: No Floating Point Exceptions have been reported - 9,734,561,067 cycles # 2.900 GHz - 27,089,258,573 instructions # 2.78 insn per cycle - 3.357505834 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96286) (avx2: 0) (512y: 0) (512z: 0) + 9,762,163,827 cycles # 2.856 GHz + 27,089,755,364 instructions # 2.77 insn per cycle + 3.419663266 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:96286) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.404360e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.404774e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.404774e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.328520e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.328923e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.328923e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.555973 sec +TOTAL : 1.591769 sec INFO: No Floating Point Exceptions have been reported - 4,218,180,841 cycles # 2.705 GHz - 9,561,432,896 instructions # 2.27 insn per cycle - 1.560032274 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84478) (512y: 0) (512z: 0) + 4,217,350,816 cycles # 2.647 GHz + 9,560,856,496 instructions # 2.27 insn per cycle + 1.595749154 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84478) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.834965e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.835499e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.835499e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.765235e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.765802e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.765802e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.381649 sec +TOTAL : 1.407302 sec INFO: No Floating Point Exceptions have been reported - 3,737,866,183 cycles # 2.699 GHz - 8,484,726,102 instructions # 2.27 insn per cycle - 1.385644376 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:80014) (512y: 241) (512z: 0) + 3,737,969,275 cycles # 2.650 GHz + 8,484,674,655 instructions # 2.27 insn per cycle + 1.411406372 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80014) (512y: 241) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.375552e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.376120e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.376120e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.273845e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.274322e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.274322e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.569284 sec +TOTAL : 1.617639 sec INFO: No Floating Point Exceptions have been reported - 2,699,189,606 cycles # 1.717 GHz - 4,275,945,743 instructions # 1.58 insn per cycle - 1.573305719 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2169) (512y: 187) (512z:79110) + 2,695,774,477 cycles # 1.663 GHz + 4,276,120,388 instructions # 1.59 insn per cycle + 1.621698890 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2169) (512y: 187) (512z:79110) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index d45d414f5f..8097294660 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-15_09:03:47 +DATE: 2024-05-16_14:45:55 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/gcheck.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.561692e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.562488e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.562848e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.560287e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.561087e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.561509e+02 ) sec^-1 MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 -TOTAL : 1.685180 sec +TOTAL : 1.688313 sec INFO: No Floating Point Exceptions have been reported - 5,673,036,605 cycles # 2.891 GHz - 10,563,415,638 instructions # 1.86 insn per cycle - 2.019219660 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/gcheck.exe -p 1 256 1 + 5,610,919,333 cycles # 2.843 GHz + 12,076,970,192 instructions # 2.15 insn per cycle + 2.032164963 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.299284e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.299934e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.300068e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.335524e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.336187e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.336332e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856829e-04 +- 8.333435e-05 ) GeV^-6 -TOTAL : 1.926355 sec +TOTAL : 1.920912 sec INFO: No Floating Point Exceptions have been reported - 6,389,248,766 cycles # 2.887 GHz - 13,556,303,615 instructions # 2.12 insn per cycle - 2.270243999 seconds time elapsed + 6,262,064,127 cycles # 2.846 GHz + 13,866,454,713 instructions # 2.21 insn per cycle + 2.256561773 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 9.849636e-03 Avg ME (F77/GPU) = 9.8712405367667715E-003 Relative difference = 0.0021934350433631634 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.586714e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.586972e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.586972e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.473644e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.473896e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.473896e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 6.153257 sec +TOTAL : 6.236471 sec INFO: No Floating Point Exceptions have been reported - 17,830,313,772 cycles # 2.896 GHz - 53,588,816,239 instructions # 3.01 insn per cycle - 6.157108561 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:20208) (avx2: 0) (512y: 0) (512z: 0) + 17,834,532,335 cycles # 2.858 GHz + 53,589,179,622 instructions # 3.00 insn per cycle + 6.240522901 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:20208) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.847961e-03 Avg ME (F77/C++) = 9.8479612087541066E-003 Relative difference = 2.1197698286506752e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.368933e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.369342e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.369342e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.311314e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.311704e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.311704e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.572219 sec +TOTAL : 1.599839 sec INFO: No Floating Point Exceptions have been reported - 4,576,967,383 cycles # 2.905 GHz - 13,761,803,462 instructions # 3.01 insn per cycle - 1.576296416 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96986) (avx2: 0) (512y: 0) (512z: 0) + 4,578,829,094 cycles # 2.856 GHz + 13,761,810,246 instructions # 3.01 insn per cycle + 1.603811766 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:96986) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.847955e-03 Avg ME (F77/C++) = 9.8479546896527003E-003 Relative difference = 3.151388282563952e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.773238e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.774866e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.774866e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.636666e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.638274e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.638274e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.785027 sec +TOTAL : 0.800488 sec INFO: No Floating Point Exceptions have been reported - 2,126,166,499 cycles # 2.697 GHz - 4,816,144,945 instructions # 2.27 insn per cycle - 0.789120264 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84904) (512y: 0) (512z: 0) + 2,129,570,848 cycles # 2.649 GHz + 4,816,093,977 instructions # 2.26 insn per cycle + 0.804523713 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84904) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 Avg ME (F77/C++) = 9.8929728161070551E-003 Relative difference = 1.858823877057982e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.733735e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.735805e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.735805e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.627354e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.629498e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.629498e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.687951 sec +TOTAL : 0.697299 sec INFO: No Floating Point Exceptions have been reported - 1,861,508,781 cycles # 2.693 GHz - 4,273,150,888 instructions # 2.30 insn per cycle - 0.691882515 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:80610) (512y: 46) (512z: 0) + 1,857,131,979 cycles # 2.651 GHz + 4,273,320,598 instructions # 2.30 insn per cycle + 0.701213399 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80610) (512y: 46) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 Avg ME (F77/C++) = 9.8929728161070551E-003 Relative difference = 1.858823877057982e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.772663e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.774875e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.774875e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.540089e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.542023e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.542023e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.785119 sec +TOTAL : 0.812936 sec INFO: No Floating Point Exceptions have been reported - 1,354,689,818 cycles # 1.718 GHz - 2,158,158,197 instructions # 1.59 insn per cycle - 0.789277425 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2890) (512y: 49) (512z:79305) + 1,360,618,833 cycles # 1.668 GHz + 2,159,125,772 instructions # 1.59 insn per cycle + 0.816997353 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2890) (512y: 49) (512z:79305) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892981e-03 Avg ME (F77/C++) = 9.8929811982958280E-003 Relative difference = 2.0044092642523172e-08 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt index 7278096934..6d352d97ac 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt @@ -40,11 +40,11 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-15_09:22:33 +DATE: 2024-05-16_15:04:47 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/gcheck.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -53,16 +53,16 @@ WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gp Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.582019e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.583834e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.583834e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.582684e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.584567e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.584567e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187094e-05 +- 9.825664e-06 ) GeV^-6 -TOTAL : 1.629368 sec +TOTAL : 1.633735 sec INFO: No Floating Point Exceptions have been reported - 5,438,501,558 cycles # 2.848 GHz - 11,384,430,806 instructions # 2.09 insn per cycle - 1.966478961 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/gcheck.exe -p 1 256 1 --bridge + 5,453,177,396 cycles # 2.847 GHz + 11,627,188,509 instructions # 2.13 insn per cycle + 1.971399165 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) @@ -70,7 +70,7 @@ WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gp ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -79,29 +79,29 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.332845e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.346794e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.346794e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.292389e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.306150e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.306150e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856441e-04 +- 8.331096e-05 ) GeV^-6 -TOTAL : 1.897225 sec +TOTAL : 1.932378 sec INFO: No Floating Point Exceptions have been reported - 6,200,851,000 cycles # 2.852 GHz - 13,608,717,924 instructions # 2.19 insn per cycle - 2.231041436 seconds time elapsed + 6,313,737,946 cycles # 2.848 GHz + 13,568,150,990 instructions # 2.15 insn per cycle + 2.274068662 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 9.849636e-03 Avg ME (F77/GPU) = 9.8712405367667715E-003 Relative difference = 0.0021934350433631634 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -109,28 +109,28 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.468083e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.468339e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.468339e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.447529e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.447785e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.447785e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 6.240178 sec +TOTAL : 6.255519 sec INFO: No Floating Point Exceptions have been reported - 17,844,842,659 cycles # 2.859 GHz - 53,590,816,936 instructions # 3.00 insn per cycle - 6.244182701 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:20208) (avx2: 0) (512y: 0) (512z: 0) + 17,871,844,477 cycles # 2.856 GHz + 53,590,423,890 instructions # 3.00 insn per cycle + 6.259496797 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:20208) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.847961e-03 Avg ME (F77/C++) = 9.8479612087541066E-003 Relative difference = 2.1197698286506752e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -138,28 +138,28 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.307909e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.308350e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.308350e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.319002e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.319403e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.319403e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.601295 sec +TOTAL : 1.595862 sec INFO: No Floating Point Exceptions have been reported - 4,582,592,604 cycles # 2.857 GHz - 13,762,912,115 instructions # 3.00 insn per cycle - 1.605688643 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96986) (avx2: 0) (512y: 0) (512z: 0) + 4,573,738,949 cycles # 2.860 GHz + 13,762,785,828 instructions # 3.01 insn per cycle + 1.599904345 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:96986) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.847955e-03 Avg ME (F77/C++) = 9.8479546896527003E-003 Relative difference = 3.151388282563952e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -167,28 +167,28 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.657262e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.658867e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.658867e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.613525e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.615218e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.615218e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.798566 sec +TOTAL : 0.804350 sec INFO: No Floating Point Exceptions have been reported - 2,128,647,672 cycles # 2.655 GHz - 4,817,126,024 instructions # 2.26 insn per cycle - 0.802553686 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84904) (512y: 0) (512z: 0) + 2,139,167,872 cycles # 2.648 GHz + 4,817,111,626 instructions # 2.25 insn per cycle + 0.808508083 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84904) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 Avg ME (F77/C++) = 9.8929728161070551E-003 Relative difference = 1.858823877057982e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -196,28 +196,28 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.573528e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.575580e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.575580e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.603124e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.605216e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.605216e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.702208 sec +TOTAL : 0.699524 sec INFO: No Floating Point Exceptions have been reported - 1,871,867,779 cycles # 2.653 GHz - 4,274,231,871 instructions # 2.28 insn per cycle - 0.706209772 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:80610) (512y: 46) (512z: 0) + 1,862,402,974 cycles # 2.650 GHz + 4,274,167,467 instructions # 2.29 insn per cycle + 0.703628099 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80610) (512y: 46) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 Avg ME (F77/C++) = 9.8929728161070551E-003 Relative difference = 1.858823877057982e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -225,22 +225,22 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.572374e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.574488e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.574488e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.580110e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.582197e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.582197e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.808838 sec +TOTAL : 0.807828 sec INFO: No Floating Point Exceptions have been reported - 1,356,213,597 cycles # 1.670 GHz - 2,159,107,759 instructions # 1.59 insn per cycle - 0.812846809 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2890) (512y: 49) (512z:79305) + 1,354,037,726 cycles # 1.669 GHz + 2,159,114,420 instructions # 1.59 insn per cycle + 0.811949308 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2890) (512y: 49) (512z:79305) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892981e-03 Avg ME (F77/C++) = 9.8929811982958280E-003 Relative difference = 2.0044092642523172e-08 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt index 61cd36fc2c..2d3f5a3740 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-15_09:04:26 +DATE: 2024-05-16_14:46:34 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/gcheck.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.545861e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.546657e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.547142e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.539024e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.539847e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.540280e+02 ) sec^-1 MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 -TOTAL : 1.688745 sec +TOTAL : 1.689103 sec INFO: No Floating Point Exceptions have been reported - 5,685,323,668 cycles # 2.881 GHz - 12,065,201,392 instructions # 2.12 insn per cycle - 2.032136430 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/gcheck.exe -p 1 256 1 + 5,627,474,622 cycles # 2.847 GHz + 11,923,534,222 instructions # 2.12 insn per cycle + 2.035228412 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.321948e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.322613e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.322750e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.303686e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.304329e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.304469e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856829e-04 +- 8.333435e-05 ) GeV^-6 -TOTAL : 1.913475 sec +TOTAL : 1.932451 sec INFO: No Floating Point Exceptions have been reported - 6,335,955,396 cycles # 2.890 GHz - 13,040,721,092 instructions # 2.06 insn per cycle - 2.248864975 seconds time elapsed + 6,311,455,519 cycles # 2.848 GHz + 13,762,708,375 instructions # 2.18 insn per cycle + 2.272906437 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 9.849636e-03 Avg ME (F77/GPU) = 9.8712405367667715E-003 Relative difference = 0.0021934350433631634 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.627174e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.627430e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.627430e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.477402e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.477656e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.477656e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 6.124640 sec +TOTAL : 6.233047 sec INFO: No Floating Point Exceptions have been reported - 17,801,480,091 cycles # 2.905 GHz - 53,579,767,361 instructions # 3.01 insn per cycle - 6.128606412 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:20207) (avx2: 0) (512y: 0) (512z: 0) + 17,803,580,317 cycles # 2.855 GHz + 53,580,069,164 instructions # 3.01 insn per cycle + 6.237030677 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:20207) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.847961e-03 Avg ME (F77/C++) = 9.8479612087582491E-003 Relative difference = 2.1198118933954545e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.387094e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.387509e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.387509e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.307415e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.307805e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.307805e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.563998 sec +TOTAL : 1.601039 sec INFO: No Floating Point Exceptions have been reported - 4,549,922,930 cycles # 2.903 GHz - 13,755,140,352 instructions # 3.02 insn per cycle - 1.568011182 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96606) (avx2: 0) (512y: 0) (512z: 0) + 4,572,009,891 cycles # 2.850 GHz + 13,755,353,111 instructions # 3.01 insn per cycle + 1.605120576 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:96606) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.847955e-03 Avg ME (F77/C++) = 9.8479546896225560E-003 Relative difference = 3.151694379513441e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.714186e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.715793e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.715793e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.585961e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.587683e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.587683e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.791730 sec +TOTAL : 0.806516 sec INFO: No Floating Point Exceptions have been reported - 2,142,665,364 cycles # 2.695 GHz - 4,818,205,802 instructions # 2.25 insn per cycle - 0.795892612 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:85359) (512y: 0) (512z: 0) + 2,141,220,761 cycles # 2.644 GHz + 4,818,439,860 instructions # 2.25 insn per cycle + 0.810543510 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:85359) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 Avg ME (F77/C++) = 9.8929728161070967E-003 Relative difference = 1.8588234562202478e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.551977e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.554036e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.554036e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.583228e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.585349e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.585349e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.704224 sec +TOTAL : 0.701072 sec INFO: No Floating Point Exceptions have been reported - 1,876,007,792 cycles # 2.651 GHz - 4,275,151,801 instructions # 2.28 insn per cycle - 0.708285391 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:81075) (512y: 26) (512z: 0) + 1,870,651,613 cycles # 2.656 GHz + 4,275,203,774 instructions # 2.29 insn per cycle + 0.705038579 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81075) (512y: 26) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 Avg ME (F77/C++) = 9.8929728161070967E-003 Relative difference = 1.8588234562202478e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.544217e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.546156e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.546156e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.570140e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.572065e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.572065e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.811936 sec +TOTAL : 0.808794 sec INFO: No Floating Point Exceptions have been reported - 1,361,211,310 cycles # 1.670 GHz - 2,164,522,586 instructions # 1.59 insn per cycle - 0.816005153 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3487) (512y: 34) (512z:79499) + 1,356,929,556 cycles # 1.671 GHz + 2,164,613,956 instructions # 1.60 insn per cycle + 0.812781092 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3487) (512y: 34) (512z:79499) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892981e-03 Avg ME (F77/C++) = 9.8929811982955140E-003 Relative difference = 2.0044060904369713e-08 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index d9b2e71fbd..dfab5870bc 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-15_09:05:05 +DATE: 2024-05-16_14:47:13 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/gcheck.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.690095e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.690606e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.690910e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.689455e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.689959e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.690212e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 2.176133 sec +TOTAL : 2.179203 sec INFO: No Floating Point Exceptions have been reported - 7,139,703,316 cycles # 2.856 GHz - 15,654,967,363 instructions # 2.19 insn per cycle - 2.555531923 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/gcheck.exe -p 1 256 1 + 7,126,539,551 cycles # 2.849 GHz + 15,807,759,758 instructions # 2.22 insn per cycle + 2.559686036 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.111988e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.112256e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.112295e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.107889e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.108154e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.108192e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.411127 sec +TOTAL : 3.424856 sec INFO: No Floating Point Exceptions have been reported - 10,670,396,905 cycles # 2.855 GHz - 22,897,427,762 instructions # 2.15 insn per cycle - 3.795230746 seconds time elapsed + 10,729,635,772 cycles # 2.852 GHz + 25,204,058,412 instructions # 2.35 insn per cycle + 3.820430433 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 9.872263e-03 Avg ME (F77/GPU) = 9.8722599015656498E-003 Relative difference = 3.1385249252060663e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.341488e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.341701e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.341701e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.303415e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.303607e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.303607e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 7.183650 sec +TOTAL : 7.238072 sec INFO: No Floating Point Exceptions have been reported - 19,087,208,073 cycles # 2.656 GHz - 54,154,763,894 instructions # 2.84 insn per cycle - 7.187584076 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:32067) (avx2: 0) (512y: 0) (512z: 0) + 19,150,406,884 cycles # 2.645 GHz + 54,154,394,762 instructions # 2.83 insn per cycle + 7.242308052 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32067) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595861831675E-003 Relative difference = 3.457988134687711e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.509804e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.509885e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.509885e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.497154e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.497235e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.497235e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 3.502059 sec +TOTAL : 3.531757 sec INFO: No Floating Point Exceptions have been reported - 9,306,165,950 cycles # 2.655 GHz - 26,158,910,898 instructions # 2.81 insn per cycle - 3.506050842 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96007) (avx2: 0) (512y: 0) (512z: 0) + 9,343,938,644 cycles # 2.643 GHz + 26,158,830,842 instructions # 2.80 insn per cycle + 3.535758073 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:96007) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594844308162E-003 Relative difference = 3.5610570575237004e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.456453e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.456874e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.456874e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.453828e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.454275e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.454275e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.532803 sec +TOTAL : 1.534198 sec INFO: No Floating Point Exceptions have been reported - 4,070,475,260 cycles # 2.650 GHz - 9,228,220,154 instructions # 2.27 insn per cycle - 1.536759047 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84155) (512y: 0) (512z: 0) + 4,069,691,610 cycles # 2.648 GHz + 9,228,168,046 instructions # 2.27 insn per cycle + 1.538179495 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84155) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.023017e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.023619e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.023619e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.986335e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.986931e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.986931e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.317421 sec +TOTAL : 1.329366 sec INFO: No Floating Point Exceptions have been reported - 3,502,203,854 cycles # 2.652 GHz - 8,174,654,623 instructions # 2.33 insn per cycle - 1.321520742 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:79844) (512y: 79) (512z: 0) + 3,528,184,184 cycles # 2.647 GHz + 8,174,614,993 instructions # 2.32 insn per cycle + 1.333451918 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:79844) (512y: 79) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.376263e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.376781e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.376781e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.367967e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.368468e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.368468e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.569331 sec +TOTAL : 1.573046 sec INFO: No Floating Point Exceptions have been reported - 2,616,979,256 cycles # 1.664 GHz - 4,154,596,109 instructions # 1.59 insn per cycle - 1.573481593 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2046) (512y: 93) (512z:78760) + 2,618,946,865 cycles # 1.661 GHz + 4,154,480,374 instructions # 1.59 insn per cycle + 1.577054610 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2046) (512y: 93) (512z:78760) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt index 5a0d7870a2..3ddfb4805b 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-15_09:05:57 +DATE: 2024-05-16_14:48:06 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/gcheck.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.685558e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.686059e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.686349e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.679279e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.679786e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.680054e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 2.179217 sec +TOTAL : 2.178048 sec INFO: No Floating Point Exceptions have been reported - 7,132,247,686 cycles # 2.849 GHz - 16,033,584,026 instructions # 2.25 insn per cycle - 2.559487651 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/gcheck.exe -p 1 256 1 + 7,134,880,740 cycles # 2.851 GHz + 15,434,594,866 instructions # 2.16 insn per cycle + 2.558453633 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.107499e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.107770e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.107809e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.104221e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.104483e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.104525e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.414639 sec +TOTAL : 3.411600 sec INFO: No Floating Point Exceptions have been reported - 10,670,291,088 cycles # 2.854 GHz - 23,679,613,894 instructions # 2.22 insn per cycle - 3.796653222 seconds time elapsed + 10,672,973,002 cycles # 2.855 GHz + 24,521,846,399 instructions # 2.30 insn per cycle + 3.794724712 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 9.872263e-03 Avg ME (F77/GPU) = 9.8722599015656498E-003 Relative difference = 3.1385249252060663e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.443183e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.443381e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.443381e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.893217e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.893439e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.893439e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 7.087077 sec +TOTAL : 6.694276 sec INFO: No Floating Point Exceptions have been reported - 19,147,646,492 cycles # 2.701 GHz - 54,154,361,807 instructions # 2.83 insn per cycle - 7.091178221 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:32244) (avx2: 0) (512y: 0) (512z: 0) + 19,121,414,788 cycles # 2.855 GHz + 54,156,458,090 instructions # 2.83 insn per cycle + 6.698138270 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32244) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595861831675E-003 Relative difference = 3.457988134687711e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.486367e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.486451e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.486451e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.495395e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.495480e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.495480e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 3.556604 sec +TOTAL : 3.535528 sec INFO: No Floating Point Exceptions have been reported - 9,388,405,137 cycles # 2.638 GHz - 26,078,220,173 instructions # 2.78 insn per cycle - 3.560671471 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:95901) (avx2: 0) (512y: 0) (512z: 0) + 9,391,010,006 cycles # 2.654 GHz + 26,079,707,862 instructions # 2.78 insn per cycle + 3.539600596 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:95901) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594844308162E-003 Relative difference = 3.5610570575237004e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.497483e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.497914e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.497914e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.518532e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.518969e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.518969e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.514756 sec +TOTAL : 1.505616 sec INFO: No Floating Point Exceptions have been reported - 4,017,002,412 cycles # 2.646 GHz - 9,213,710,845 instructions # 2.29 insn per cycle - 1.518740707 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:83776) (512y: 0) (512z: 0) + 4,001,150,405 cycles # 2.652 GHz + 9,212,868,850 instructions # 2.30 insn per cycle + 1.509560632 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83776) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.994066e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.994690e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.994690e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.985927e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.986486e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.986486e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.326970 sec +TOTAL : 1.329709 sec INFO: No Floating Point Exceptions have been reported - 3,524,449,301 cycles # 2.649 GHz - 8,167,512,543 instructions # 2.32 insn per cycle - 1.330907157 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:79373) (512y: 229) (512z: 0) + 3,529,740,112 cycles # 2.648 GHz + 8,168,252,869 instructions # 2.31 insn per cycle + 1.333651402 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:79373) (512y: 229) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.358102e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.358609e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.358609e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.359545e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.360045e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.360045e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.577322 sec +TOTAL : 1.576721 sec INFO: No Floating Point Exceptions have been reported - 2,627,871,884 cycles # 1.663 GHz - 4,154,099,949 instructions # 1.58 insn per cycle - 1.581349880 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1493) (512y: 175) (512z:78776) + 2,623,702,370 cycles # 1.660 GHz + 4,153,356,804 instructions # 1.58 insn per cycle + 1.580839869 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1493) (512y: 175) (512z:78776) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index bddb87b89c..38bc670a18 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-15_09:00:42 +DATE: 2024-05-16_14:42:48 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.909030e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.254339e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.559590e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.927387e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.315718e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.634653e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.451814 sec +TOTAL : 0.452451 sec INFO: No Floating Point Exceptions have been reported - 1,901,362,158 cycles # 2.851 GHz - 2,671,473,853 instructions # 1.41 insn per cycle - 0.724019133 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 1 + 1,879,085,625 cycles # 2.815 GHz + 2,632,406,951 instructions # 1.40 insn per cycle + 0.724903288 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.666389e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.186579e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.531944e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.675522e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.208336e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.557322e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.532226 sec +TOTAL : 0.534593 sec INFO: No Floating Point Exceptions have been reported - 2,173,433,084 cycles # 2.828 GHz - 3,153,637,703 instructions # 1.45 insn per cycle - 0.825328005 seconds time elapsed + 2,165,913,457 cycles # 2.812 GHz + 3,139,398,529 instructions # 1.45 insn per cycle + 0.827804422 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.424749e-01 Avg ME (F77/GPU) = 0.14247482467490466 Relative difference = 5.286902838873106e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.011847e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.033729e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.033729e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.011560e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.033153e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.033153e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.642681 sec +TOTAL : 1.642980 sec INFO: No Floating Point Exceptions have been reported - 4,720,170,815 cycles # 2.868 GHz - 13,462,481,531 instructions # 2.85 insn per cycle - 1.646920268 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 860) (avx2: 0) (512y: 0) (512z: 0) + 4,710,402,412 cycles # 2.861 GHz + 13,462,495,012 instructions # 2.86 insn per cycle + 1.647108070 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 860) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499484 Relative difference = 5.286896509487005e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.850569e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.921982e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.921982e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.839775e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.910542e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.910542e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.907708 sec +TOTAL : 0.913158 sec INFO: No Floating Point Exceptions have been reported - 2,621,073,603 cycles # 2.876 GHz - 7,551,555,914 instructions # 2.88 insn per cycle - 0.911980507 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3099) (avx2: 0) (512y: 0) (512z: 0) + 2,620,816,977 cycles # 2.859 GHz + 7,551,970,333 instructions # 2.88 insn per cycle + 0.917276709 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3099) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499478 Relative difference = 5.28689651338321e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.026606e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.230232e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.230232e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.970408e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.156692e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.156692e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.564761 sec +TOTAL : 0.574933 sec INFO: No Floating Point Exceptions have been reported - 1,486,098,299 cycles # 2.614 GHz - 3,119,399,678 instructions # 2.10 insn per cycle - 0.569234281 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2917) (512y: 0) (512z: 0) + 1,480,758,822 cycles # 2.560 GHz + 3,119,703,419 instructions # 2.11 insn per cycle + 0.579132992 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2917) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492595 Relative difference = 5.286901344678233e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.466118e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.717374e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.717374e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.428889e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.682851e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.682851e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.495756 sec +TOTAL : 0.501020 sec INFO: No Floating Point Exceptions have been reported - 1,343,361,512 cycles # 2.691 GHz - 2,981,714,602 instructions # 2.22 insn per cycle - 0.500153355 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2694) (512y: 104) (512z: 0) + 1,347,520,276 cycles # 2.670 GHz + 2,981,434,055 instructions # 2.21 insn per cycle + 0.505363497 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2694) (512y: 104) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492595 Relative difference = 5.286901344678233e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.236724e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.347469e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.347469e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.241546e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.347861e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.347861e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.757688 sec +TOTAL : 0.756225 sec INFO: No Floating Point Exceptions have been reported - 1,336,802,616 cycles # 1.756 GHz - 1,953,160,303 instructions # 1.46 insn per cycle - 0.761809117 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1372) (512y: 106) (512z: 2173) + 1,330,320,612 cycles # 1.751 GHz + 1,953,406,018 instructions # 1.47 insn per cycle + 0.760489864 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1372) (512y: 106) (512z: 2173) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492595 Relative difference = 5.286901344678233e-07 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt index 1e67b43177..6f141963c0 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt @@ -40,11 +40,11 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-15_09:20:13 +DATE: 2024-05-16_15:02:27 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -53,16 +53,16 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.448925e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.124463e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.124463e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.428295e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.103056e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.103056e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.483255 sec +TOTAL : 0.480636 sec INFO: No Floating Point Exceptions have been reported - 1,956,343,524 cycles # 2.810 GHz - 2,913,177,590 instructions # 1.49 insn per cycle - 0.753489640 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge + 1,959,891,585 cycles # 2.818 GHz + 2,927,619,706 instructions # 1.49 insn per cycle + 0.752080667 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) @@ -70,7 +70,7 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -79,29 +79,29 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.145662e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.331613e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.331613e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.157968e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.371122e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.371122e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.764560 sec +TOTAL : 0.764001 sec INFO: No Floating Point Exceptions have been reported - 2,875,174,546 cycles # 2.829 GHz - 4,447,931,202 instructions # 1.55 insn per cycle - 1.073875112 seconds time elapsed + 2,873,640,599 cycles # 2.829 GHz + 4,407,079,803 instructions # 1.53 insn per cycle + 1.073816079 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.424749e-01 Avg ME (F77/GPU) = 0.14247482467490466 Relative difference = 5.286902838873106e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -109,28 +109,28 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.006409e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.028326e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.028326e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.008642e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.030371e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.030371e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.659684 sec +TOTAL : 1.654853 sec INFO: No Floating Point Exceptions have been reported - 4,757,299,192 cycles # 2.860 GHz - 13,469,690,210 instructions # 2.83 insn per cycle - 1.664301921 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 860) (avx2: 0) (512y: 0) (512z: 0) + 4,747,034,662 cycles # 2.862 GHz + 13,469,694,473 instructions # 2.84 insn per cycle + 1.659302078 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 860) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499484 Relative difference = 5.286896509487005e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -138,28 +138,28 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.826759e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.897996e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.897996e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.820348e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.892211e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.892211e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.926560 sec +TOTAL : 0.930951 sec INFO: No Floating Point Exceptions have been reported - 2,658,383,528 cycles # 2.857 GHz - 7,600,215,451 instructions # 2.86 insn per cycle - 0.930908981 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3099) (avx2: 0) (512y: 0) (512z: 0) + 2,665,977,292 cycles # 2.852 GHz + 7,601,998,240 instructions # 2.85 insn per cycle + 0.935555380 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3099) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499478 Relative difference = 5.28689651338321e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -167,28 +167,28 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.079827e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.284873e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.284873e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.068620e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.272960e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.272960e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.562992 sec +TOTAL : 0.564348 sec INFO: No Floating Point Exceptions have been reported - 1,517,343,501 cycles # 2.677 GHz - 3,170,040,097 instructions # 2.09 insn per cycle - 0.567535406 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2917) (512y: 0) (512z: 0) + 1,513,664,570 cycles # 2.669 GHz + 3,168,463,518 instructions # 2.09 insn per cycle + 0.568761168 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2917) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492595 Relative difference = 5.286901344678233e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -196,28 +196,28 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.421067e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.671215e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.671215e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.408389e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.655047e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.655047e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.509037 sec +TOTAL : 0.511104 sec INFO: No Floating Point Exceptions have been reported - 1,373,998,265 cycles # 2.680 GHz - 3,030,756,412 instructions # 2.21 insn per cycle - 0.513395811 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2694) (512y: 104) (512z: 0) + 1,377,582,779 cycles # 2.675 GHz + 3,030,644,125 instructions # 2.20 insn per cycle + 0.515560343 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2694) (512y: 104) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492595 Relative difference = 5.286901344678233e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -225,22 +225,22 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.223104e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.330253e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.330253e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.221799e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.329402e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.329402e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.769822 sec +TOTAL : 0.769820 sec INFO: No Floating Point Exceptions have been reported - 1,369,271,721 cycles # 1.771 GHz - 1,991,169,855 instructions # 1.45 insn per cycle - 0.774195853 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1372) (512y: 106) (512z: 2173) + 1,366,102,927 cycles # 1.765 GHz + 1,991,071,116 instructions # 1.46 insn per cycle + 0.774386560 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1372) (512y: 106) (512z: 2173) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492595 Relative difference = 5.286901344678233e-07 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt index 4aab0bf5ea..25b8d3c885 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-15_09:00:56 +DATE: 2024-05-16_14:43:01 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.898149e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.161627e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.461883e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.907170e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.197971e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.504611e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.453219 sec +TOTAL : 0.455771 sec INFO: No Floating Point Exceptions have been reported - 1,899,143,143 cycles # 2.817 GHz - 2,687,151,052 instructions # 1.41 insn per cycle - 0.730634084 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/gcheck.exe -p 64 256 1 + 1,881,865,516 cycles # 2.813 GHz + 2,669,782,801 instructions # 1.42 insn per cycle + 0.727786761 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.646029e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.108157e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.446695e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.641992e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.081273e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.416654e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.533523 sec +TOTAL : 0.532301 sec INFO: No Floating Point Exceptions have been reported - 2,165,769,421 cycles # 2.820 GHz - 3,118,817,395 instructions # 1.44 insn per cycle - 0.826557976 seconds time elapsed + 2,167,822,822 cycles # 2.823 GHz + 3,120,353,321 instructions # 1.44 insn per cycle + 0.825343283 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.424749e-01 Avg ME (F77/GPU) = 0.14247482467490466 Relative difference = 5.286902838873106e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.012188e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.033722e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.033722e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.007784e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.029112e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.029112e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.642236 sec +TOTAL : 1.648995 sec INFO: No Floating Point Exceptions have been reported - 4,733,263,322 cycles # 2.876 GHz - 13,456,928,369 instructions # 2.84 insn per cycle - 1.646542126 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 849) (avx2: 0) (512y: 0) (512z: 0) + 4,725,323,359 cycles # 2.860 GHz + 13,457,369,308 instructions # 2.85 insn per cycle + 1.653142214 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 849) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499484 Relative difference = 5.286896509487005e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.865658e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.935819e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.935819e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.833913e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.904030e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.904030e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.900438 sec +TOTAL : 0.915889 sec INFO: No Floating Point Exceptions have been reported - 2,628,911,017 cycles # 2.909 GHz - 7,551,755,138 instructions # 2.87 insn per cycle - 0.904636036 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3092) (avx2: 0) (512y: 0) (512z: 0) + 2,628,184,982 cycles # 2.858 GHz + 7,551,273,836 instructions # 2.87 insn per cycle + 0.920086997 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3092) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499478 Relative difference = 5.28689651338321e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.162085e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.369795e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.369795e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.116183e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.320457e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.320457e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.541035 sec +TOTAL : 0.548674 sec INFO: No Floating Point Exceptions have been reported - 1,478,892,065 cycles # 2.715 GHz - 3,118,063,669 instructions # 2.11 insn per cycle - 0.545344535 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2900) (512y: 0) (512z: 0) + 1,476,841,675 cycles # 2.675 GHz + 3,117,924,257 instructions # 2.11 insn per cycle + 0.552738607 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2900) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492595 Relative difference = 5.286901344678233e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.521390e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.773050e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.773050e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.456247e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.706124e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.706124e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.487980 sec +TOTAL : 0.496887 sec INFO: No Floating Point Exceptions have been reported - 1,338,590,636 cycles # 2.723 GHz - 2,978,713,537 instructions # 2.23 insn per cycle - 0.492213180 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2670) (512y: 104) (512z: 0) + 1,340,057,166 cycles # 2.677 GHz + 2,978,732,248 instructions # 2.22 insn per cycle + 0.501058940 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2670) (512y: 104) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492595 Relative difference = 5.286901344678233e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.305225e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.413141e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.413141e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.241283e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.347840e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.347840e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.735631 sec +TOTAL : 0.757118 sec INFO: No Floating Point Exceptions have been reported - 1,327,963,112 cycles # 1.797 GHz - 1,951,639,234 instructions # 1.47 insn per cycle - 0.739806638 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1348) (512y: 106) (512z: 2173) + 1,329,966,748 cycles # 1.749 GHz + 1,951,787,640 instructions # 1.47 insn per cycle + 0.761356492 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1348) (512y: 106) (512z: 2173) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492595 Relative difference = 5.286901344678233e-07 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index c1da5435c4..88eaa7d80d 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-15_09:01:09 +DATE: 2024-05-16_14:43:15 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.475418e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.211242e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.342424e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.867335e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.223690e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.343650e+08 ) sec^-1 MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 -TOTAL : 0.444704 sec +TOTAL : 0.450195 sec INFO: No Floating Point Exceptions have been reported - 1,889,829,448 cycles # 2.849 GHz - 2,675,641,307 instructions # 1.42 insn per cycle - 0.720116124 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 1 + 1,886,543,936 cycles # 2.814 GHz + 2,627,629,254 instructions # 1.39 insn per cycle + 0.729554150 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 167 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.067311e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.829054e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.961375e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.183442e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.842494e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.962990e+08 ) sec^-1 MeanMatrixElemValue = ( 2.571361e+02 +- 2.114021e+02 ) GeV^-2 -TOTAL : 0.479911 sec +TOTAL : 0.482910 sec INFO: No Floating Point Exceptions have been reported - 2,018,885,875 cycles # 2.854 GHz - 2,869,563,274 instructions # 1.42 insn per cycle - 0.764758987 seconds time elapsed + 1,994,696,147 cycles # 2.812 GHz + 2,828,466,882 instructions # 1.42 insn per cycle + 0.766894337 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.424226e-01 Avg ME (F77/GPU) = 0.14247488790821983 Relative difference = 0.00036713209996037764 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.086530e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.111191e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.111191e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.069532e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.093791e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.093791e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.529427 sec +TOTAL : 1.553490 sec INFO: No Floating Point Exceptions have been reported - 4,457,289,207 cycles # 2.908 GHz - 13,047,683,288 instructions # 2.93 insn per cycle - 1.533439591 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 745) (avx2: 0) (512y: 0) (512z: 0) + 4,455,366,971 cycles # 2.862 GHz + 13,047,769,817 instructions # 2.93 insn per cycle + 1.557630020 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 745) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 Avg ME (F77/C++) = 0.14246857540270419 Relative difference = 1.7265064590569047e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.919635e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.106235e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.106235e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.869084e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.052765e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.052765e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 -TOTAL : 0.582181 sec +TOTAL : 0.592151 sec INFO: No Floating Point Exceptions have been reported - 1,698,976,531 cycles # 2.901 GHz - 4,511,883,636 instructions # 2.66 insn per cycle - 0.586192712 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3600) (avx2: 0) (512y: 0) (512z: 0) + 1,701,146,602 cycles # 2.856 GHz + 4,512,165,265 instructions # 2.65 insn per cycle + 0.596248693 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3600) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 Avg ME (F77/C++) = 0.14246859631675157 Relative difference = 2.5853054135974944e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.693981e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.403408e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.403408e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.609679e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.315056e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.315056e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.308817 sec +TOTAL : 0.313413 sec INFO: No Floating Point Exceptions have been reported - 850,392,085 cycles # 2.723 GHz - 1,895,931,529 instructions # 2.23 insn per cycle - 0.312838116 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3491) (512y: 0) (512z: 0) + 850,737,642 cycles # 2.684 GHz + 1,895,945,890 instructions # 2.23 insn per cycle + 0.317546154 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3491) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247489318272599 Relative difference = 4.784894739577799e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.858267e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.647572e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.647572e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.973396e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.785303e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.785303e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.301291 sec +TOTAL : 0.295859 sec INFO: No Floating Point Exceptions have been reported - 806,075,929 cycles # 2.644 GHz - 1,819,584,061 instructions # 2.26 insn per cycle - 0.305428041 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3335) (512y: 22) (512z: 0) + 801,819,935 cycles # 2.679 GHz + 1,819,229,849 instructions # 2.27 insn per cycle + 0.299944027 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3335) (512y: 22) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247489318272599 Relative difference = 4.784894739577799e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.437479e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.882177e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.882177e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.354956e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.770974e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.770974e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.392579 sec +TOTAL : 0.400076 sec INFO: No Floating Point Exceptions have been reported - 734,620,005 cycles # 1.854 GHz - 1,304,440,768 instructions # 1.78 insn per cycle - 0.396763024 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1973) (512y: 32) (512z: 2382) + 733,009,701 cycles # 1.817 GHz + 1,304,250,799 instructions # 1.78 insn per cycle + 0.404216975 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1973) (512y: 32) (512z: 2382) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247489383243206 Relative difference = 4.32888033512879e-08 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt index 99a9bd4312..b62a8a0309 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt @@ -40,11 +40,11 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-15_09:20:27 +DATE: 2024-05-16_15:02:40 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -53,16 +53,16 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.331329e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.996965e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.996965e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.337579e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.030007e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.030007e+07 ) sec^-1 MeanMatrixElemValue = ( 2.017654e+01 +- 1.429184e+01 ) GeV^-2 -TOTAL : 0.463597 sec +TOTAL : 0.462122 sec INFO: No Floating Point Exceptions have been reported - 1,898,524,563 cycles # 2.813 GHz - 2,749,841,290 instructions # 1.45 insn per cycle - 0.733163352 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge + 1,901,719,201 cycles # 2.816 GHz + 2,811,032,752 instructions # 1.48 insn per cycle + 0.731978994 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) @@ -70,7 +70,7 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks ==PROF== Profiling "sigmaKin": launch__registers_per_thread 167 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -79,29 +79,29 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.814076e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.499907e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.499907e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.907303e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.566216e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.566216e+07 ) sec^-1 MeanMatrixElemValue = ( 2.609942e+02 +- 2.115590e+02 ) GeV^-2 -TOTAL : 0.636307 sec +TOTAL : 0.637098 sec INFO: No Floating Point Exceptions have been reported - 2,461,375,134 cycles # 2.827 GHz - 3,720,831,531 instructions # 1.51 insn per cycle - 0.927666351 seconds time elapsed + 2,459,040,544 cycles # 2.824 GHz + 3,715,271,980 instructions # 1.51 insn per cycle + 0.927773682 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.424226e-01 Avg ME (F77/GPU) = 0.14247488790821983 Relative difference = 0.00036713209996037764 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -109,28 +109,28 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.067015e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.091473e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.091473e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.068152e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.092782e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.092782e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.560902 sec +TOTAL : 1.559403 sec INFO: No Floating Point Exceptions have been reported - 4,476,631,631 cycles # 2.861 GHz - 13,052,576,283 instructions # 2.92 insn per cycle - 1.565284133 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 745) (avx2: 0) (512y: 0) (512z: 0) + 4,475,912,555 cycles # 2.864 GHz + 13,052,235,712 instructions # 2.92 insn per cycle + 1.563691095 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 745) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 Avg ME (F77/C++) = 0.14246857540270419 Relative difference = 1.7265064590569047e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -138,28 +138,28 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.849727e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.036356e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.036356e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.856394e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.039884e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.039884e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 -TOTAL : 0.601117 sec +TOTAL : 0.599599 sec INFO: No Floating Point Exceptions have been reported - 1,725,235,767 cycles # 2.853 GHz - 4,560,189,479 instructions # 2.64 insn per cycle - 0.605343025 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3600) (avx2: 0) (512y: 0) (512z: 0) + 1,723,185,860 cycles # 2.856 GHz + 4,560,285,596 instructions # 2.65 insn per cycle + 0.603925442 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3600) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 Avg ME (F77/C++) = 0.14246859631675157 Relative difference = 2.5853054135974944e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -167,28 +167,28 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.549970e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.241462e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.241462e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.545801e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.241062e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.241062e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.320675 sec +TOTAL : 0.321141 sec INFO: No Floating Point Exceptions have been reported - 870,683,283 cycles # 2.684 GHz - 1,932,963,570 instructions # 2.22 insn per cycle - 0.324927313 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3491) (512y: 0) (512z: 0) + 871,513,310 cycles # 2.683 GHz + 1,932,959,243 instructions # 2.22 insn per cycle + 0.325378385 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3491) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247489318272599 Relative difference = 4.784894739577799e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -196,28 +196,28 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.928578e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.731935e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.731935e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.891127e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.696072e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.696072e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.302147 sec +TOTAL : 0.304221 sec INFO: No Floating Point Exceptions have been reported - 823,010,972 cycles # 2.692 GHz - 1,856,018,050 instructions # 2.26 insn per cycle - 0.306433740 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3335) (512y: 22) (512z: 0) + 825,995,486 cycles # 2.683 GHz + 1,856,161,781 instructions # 2.25 insn per cycle + 0.308416114 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3335) (512y: 22) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247489318272599 Relative difference = 4.784894739577799e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] @@ -225,22 +225,22 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.308589e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.721286e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.721286e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.307702e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.720545e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.720545e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.408402 sec +TOTAL : 0.408443 sec INFO: No Floating Point Exceptions have been reported - 755,565,484 cycles # 1.834 GHz - 1,346,064,637 instructions # 1.78 insn per cycle - 0.412615974 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1973) (512y: 32) (512z: 2382) + 755,445,387 cycles # 1.833 GHz + 1,345,989,570 instructions # 1.78 insn per cycle + 0.412779323 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1973) (512y: 32) (512z: 2382) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247489383243206 Relative difference = 4.32888033512879e-08 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt index ee7754c1e5..f782cd39a5 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-15_09:01:21 +DATE: 2024-05-16_14:43:27 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.619671e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.171347e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.321938e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.882997e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.225822e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.344729e+08 ) sec^-1 MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 -TOTAL : 0.449334 sec +TOTAL : 0.447687 sec INFO: No Floating Point Exceptions have been reported - 1,911,458,979 cycles # 2.837 GHz - 2,678,993,367 instructions # 1.40 insn per cycle - 0.731911481 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/gcheck.exe -p 64 256 1 + 1,891,564,072 cycles # 2.816 GHz + 2,660,739,786 instructions # 1.41 insn per cycle + 0.729746219 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 167 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.999796e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.792626e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.907811e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.107850e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.805980e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.921999e+08 ) sec^-1 MeanMatrixElemValue = ( 2.571361e+02 +- 2.114021e+02 ) GeV^-2 -TOTAL : 0.480185 sec +TOTAL : 0.485790 sec INFO: No Floating Point Exceptions have been reported - 2,019,442,505 cycles # 2.856 GHz - 2,899,181,374 instructions # 1.44 insn per cycle - 0.764032904 seconds time elapsed + 1,996,906,378 cycles # 2.807 GHz + 2,867,667,096 instructions # 1.44 insn per cycle + 0.769333150 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.424226e-01 Avg ME (F77/GPU) = 0.14247488790821983 Relative difference = 0.00036713209996037764 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.071947e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.096218e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.096218e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.069812e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.094168e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.094168e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.550896 sec +TOTAL : 1.553096 sec INFO: No Floating Point Exceptions have been reported - 4,453,962,103 cycles # 2.867 GHz - 13,028,729,940 instructions # 2.93 insn per cycle - 1.555070395 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 727) (avx2: 0) (512y: 0) (512z: 0) + 4,454,505,799 cycles # 2.862 GHz + 13,029,391,838 instructions # 2.92 insn per cycle + 1.557292510 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 727) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 Avg ME (F77/C++) = 0.14246857540270419 Relative difference = 1.7265064590569047e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.922555e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.112075e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.112075e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.876347e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.060596e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.060596e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 -TOTAL : 0.581270 sec +TOTAL : 0.590255 sec INFO: No Floating Point Exceptions have been reported - 1,695,848,596 cycles # 2.898 GHz - 4,508,366,642 instructions # 2.66 insn per cycle - 0.585397485 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3588) (avx2: 0) (512y: 0) (512z: 0) + 1,693,495,983 cycles # 2.852 GHz + 4,508,141,451 instructions # 2.66 insn per cycle + 0.594398488 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3588) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 Avg ME (F77/C++) = 0.14246859631675157 Relative difference = 2.5853054135974944e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.713904e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.428103e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.428103e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.574680e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.273652e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.273652e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.307920 sec +TOTAL : 0.315080 sec INFO: No Floating Point Exceptions have been reported - 851,602,918 cycles # 2.734 GHz - 1,892,990,089 instructions # 2.22 insn per cycle - 0.312008999 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3461) (512y: 0) (512z: 0) + 851,359,645 cycles # 2.672 GHz + 1,893,112,803 instructions # 2.22 insn per cycle + 0.319204462 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3461) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247489318272599 Relative difference = 4.784894739577799e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.076914e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.899685e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.899685e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.978403e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.785893e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.785893e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.290714 sec +TOTAL : 0.295093 sec INFO: No Floating Point Exceptions have been reported - 799,236,939 cycles # 2.715 GHz - 1,815,219,617 instructions # 2.27 insn per cycle - 0.294888319 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3298) (512y: 22) (512z: 0) + 799,712,323 cycles # 2.678 GHz + 1,814,979,638 instructions # 2.27 insn per cycle + 0.299228201 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3298) (512y: 22) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247489318272599 Relative difference = 4.784894739577799e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.454618e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.885329e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.885329e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.317992e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.737735e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.737735e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.390931 sec +TOTAL : 0.402763 sec INFO: No Floating Point Exceptions have been reported - 736,041,895 cycles # 1.866 GHz - 1,302,074,830 instructions # 1.77 insn per cycle - 0.395025185 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1936) (512y: 32) (512z: 2382) + 736,511,578 cycles # 1.812 GHz + 1,302,115,541 instructions # 1.77 insn per cycle + 0.406867415 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1936) (512y: 32) (512z: 2382) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247489383243206 Relative difference = 4.32888033512879e-08 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index 1fa62c82ef..265a4a7626 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-15_09:01:33 +DATE: 2024-05-16_14:43:39 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.864147e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.385266e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.725274e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.940149e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.336219e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.662963e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.450470 sec +TOTAL : 0.452840 sec INFO: No Floating Point Exceptions have been reported - 1,909,070,340 cycles # 2.862 GHz - 2,709,311,513 instructions # 1.42 insn per cycle - 0.723332111 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/gcheck.exe -p 64 256 1 + 1,880,363,198 cycles # 2.808 GHz + 2,677,692,820 instructions # 1.42 insn per cycle + 0.726161506 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.665623e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.221672e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.571452e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.684159e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.236315e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.588311e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.527783 sec +TOTAL : 0.530728 sec INFO: No Floating Point Exceptions have been reported - 2,213,027,924 cycles # 2.867 GHz - 3,201,104,338 instructions # 1.45 insn per cycle - 0.828647636 seconds time elapsed + 2,164,642,485 cycles # 2.821 GHz + 3,145,530,012 instructions # 1.45 insn per cycle + 0.824333778 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.424749e-01 Avg ME (F77/GPU) = 0.14247482577104625 Relative difference = 5.209967070245855e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.016159e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.037746e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.037746e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.003476e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.024445e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.024445e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.635594 sec +TOTAL : 1.655769 sec INFO: No Floating Point Exceptions have been reported - 4,753,331,270 cycles # 2.901 GHz - 13,466,539,270 instructions # 2.83 insn per cycle - 1.639815517 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 840) (avx2: 0) (512y: 0) (512z: 0) + 4,745,491,139 cycles # 2.860 GHz + 13,466,039,366 instructions # 2.84 insn per cycle + 1.659848552 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 840) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482734618697 Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.881999e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.954937e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.954937e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.849332e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.920343e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.920343e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.893130 sec +TOTAL : 0.908133 sec INFO: No Floating Point Exceptions have been reported - 2,606,846,984 cycles # 2.907 GHz - 7,384,622,562 instructions # 2.83 insn per cycle - 0.897377244 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3073) (avx2: 0) (512y: 0) (512z: 0) + 2,605,721,632 cycles # 2.858 GHz + 7,384,650,569 instructions # 2.83 insn per cycle + 0.912227813 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3073) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482734618697 Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.203164e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.414319e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.414319e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.133010e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.340359e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.340359e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.534464 sec +TOTAL : 0.546033 sec INFO: No Floating Point Exceptions have been reported - 1,468,689,876 cycles # 2.729 GHz - 3,055,369,851 instructions # 2.08 insn per cycle - 0.538802998 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3013) (512y: 0) (512z: 0) + 1,469,888,298 cycles # 2.674 GHz + 3,055,461,884 instructions # 2.08 insn per cycle + 0.550169150 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3013) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.611648e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.879569e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.879569e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.544324e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.807645e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.807645e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.476482 sec +TOTAL : 0.485213 sec INFO: No Floating Point Exceptions have been reported - 1,311,176,707 cycles # 2.731 GHz - 2,929,976,202 instructions # 2.23 insn per cycle - 0.480656090 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2799) (512y: 110) (512z: 0) + 1,307,959,720 cycles # 2.676 GHz + 2,930,377,532 instructions # 2.24 insn per cycle + 0.489382978 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2799) (512y: 110) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.242556e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.345674e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.345674e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.172350e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.272043e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.272043e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.755974 sec +TOTAL : 0.779592 sec INFO: No Floating Point Exceptions have been reported - 1,368,037,439 cycles # 1.801 GHz - 1,969,314,734 instructions # 1.44 insn per cycle - 0.760192026 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1700) (512y: 114) (512z: 2171) + 1,368,592,699 cycles # 1.747 GHz + 1,969,378,714 instructions # 1.44 insn per cycle + 0.783958712 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1700) (512y: 114) (512z: 2171) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt index 08bb5f814d..84e80111cc 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-15_09:01:47 +DATE: 2024-05-16_14:43:53 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.786922e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.093990e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.397156e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.890956e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.181054e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.513059e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.451157 sec +TOTAL : 0.453973 sec INFO: No Floating Point Exceptions have been reported - 1,903,287,247 cycles # 2.854 GHz - 2,701,712,617 instructions # 1.42 insn per cycle - 0.723485810 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/gcheck.exe -p 64 256 1 + 1,876,167,670 cycles # 2.808 GHz + 2,662,885,558 instructions # 1.42 insn per cycle + 0.726739496 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.631468e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.076387e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.412421e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.642147e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.081360e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.416296e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.533478 sec +TOTAL : 0.533908 sec INFO: No Floating Point Exceptions have been reported - 2,199,085,341 cycles # 2.859 GHz - 3,170,238,292 instructions # 1.44 insn per cycle - 0.827674110 seconds time elapsed + 2,163,893,097 cycles # 2.818 GHz + 3,132,561,280 instructions # 1.45 insn per cycle + 0.826852700 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.424749e-01 Avg ME (F77/GPU) = 0.14247482577104625 Relative difference = 5.209967070245855e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.015955e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.037432e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.037432e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.007176e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.028375e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.028375e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.635549 sec +TOTAL : 1.650928 sec INFO: No Floating Point Exceptions have been reported - 4,738,698,726 cycles # 2.891 GHz - 13,451,375,314 instructions # 2.84 insn per cycle - 1.639835424 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 827) (avx2: 0) (512y: 0) (512z: 0) + 4,733,031,285 cycles # 2.861 GHz + 13,451,191,160 instructions # 2.84 insn per cycle + 1.655053441 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 827) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482734618697 Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.861825e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.933431e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.933431e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.847760e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.919370e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.919370e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.902515 sec +TOTAL : 0.909156 sec INFO: No Floating Point Exceptions have been reported - 2,609,184,588 cycles # 2.879 GHz - 7,388,302,466 instructions # 2.83 insn per cycle - 0.906730854 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3062) (avx2: 0) (512y: 0) (512z: 0) + 2,606,818,939 cycles # 2.857 GHz + 7,388,977,556 instructions # 2.83 insn per cycle + 0.913243210 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3062) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482734618697 Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.186143e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.397015e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.397015e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.915489e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.093943e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.093943e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.536785 sec +TOTAL : 0.585235 sec INFO: No Floating Point Exceptions have been reported - 1,467,516,245 cycles # 2.716 GHz - 3,055,002,540 instructions # 2.08 insn per cycle - 0.540922505 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2990) (512y: 0) (512z: 0) + 1,469,957,671 cycles # 2.496 GHz + 3,055,084,256 instructions # 2.08 insn per cycle + 0.589443028 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2990) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.616442e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.883140e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.883140e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.535422e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.797003e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.797003e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.475861 sec +TOTAL : 0.486063 sec INFO: No Floating Point Exceptions have been reported - 1,308,292,499 cycles # 2.728 GHz - 2,930,815,465 instructions # 2.24 insn per cycle - 0.480084547 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2775) (512y: 110) (512z: 0) + 1,306,700,125 cycles # 2.669 GHz + 2,930,583,524 instructions # 2.24 insn per cycle + 0.490171496 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2775) (512y: 110) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check.exe -p 64 256 10 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.255482e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.357706e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.357706e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.173668e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.273111e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.273111e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.751345 sec +TOTAL : 0.778991 sec INFO: No Floating Point Exceptions have been reported - 1,365,251,936 cycles # 1.808 GHz - 1,969,248,646 instructions # 1.44 insn per cycle - 0.755562444 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1676) (512y: 114) (512z: 2171) + 1,367,910,665 cycles # 1.749 GHz + 1,969,371,455 instructions # 1.44 insn per cycle + 0.783143035 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1676) (512y: 114) (512z: 2171) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt index 2de5598db8..8af6873425 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-05-15_09:38:14 +DATE: 2024-05-16_15:20:33 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.605271e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.089863e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.185959e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.588343e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.081541e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.176224e+08 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 0.529511 sec +TOTAL : 0.528808 sec INFO: No Floating Point Exceptions have been reported - 2,190,461,080 cycles # 2.819 GHz - 3,136,910,301 instructions # 1.43 insn per cycle - 0.833966524 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 + 2,192,111,166 cycles # 2.821 GHz + 3,135,008,318 instructions # 1.43 insn per cycle + 0.833908791 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 4.313472e+00 Avg ME (F77/GPU) = 4.3134710926110280 Relative difference = 2.1036162329561614e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.866601e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.916510e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.916510e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.865233e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.915227e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.915227e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 5.729391 sec +TOTAL : 5.734356 sec INFO: No Floating Point Exceptions have been reported - 16,430,554,735 cycles # 2.866 GHz - 42,487,606,064 instructions # 2.59 insn per cycle - 5.734791443 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 711) (avx2: 0) (512y: 0) (512z: 0) + 16,430,057,220 cycles # 2.863 GHz + 42,484,854,801 instructions # 2.59 insn per cycle + 5.739849036 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 711) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105795 Relative difference = 2.1036172727915933e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.252768e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.421849e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.421849e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.235376e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.401567e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.401567e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.335881 sec +TOTAL : 3.353450 sec INFO: No Floating Point Exceptions have been reported - 9,601,506,382 cycles # 2.874 GHz - 26,317,441,050 instructions # 2.74 insn per cycle - 3.341476773 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2388) (avx2: 0) (512y: 0) (512z: 0) + 9,612,345,009 cycles # 2.863 GHz + 26,317,248,003 instructions # 2.74 insn per cycle + 3.358813940 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2388) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105804 Relative difference = 2.103617270732513e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.343541e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.785141e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.785141e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.244474e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.678972e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.678972e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.074426 sec +TOTAL : 2.111219 sec INFO: No Floating Point Exceptions have been reported - 5,659,942,551 cycles # 2.723 GHz - 12,027,441,307 instructions # 2.13 insn per cycle - 2.080055501 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2532) (512y: 0) (512z: 0) + 5,673,148,574 cycles # 2.682 GHz + 12,029,125,150 instructions # 2.12 insn per cycle + 2.116589548 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2532) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.899321e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.433990e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.433990e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.759844e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.282682e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.282682e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 1.887685 sec +TOTAL : 1.934603 sec INFO: No Floating Point Exceptions have been reported - 5,183,911,111 cycles # 2.739 GHz - 11,155,246,112 instructions # 2.15 insn per cycle - 1.893248416 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2195) (512y: 148) (512z: 0) + 5,185,525,755 cycles # 2.675 GHz + 11,158,849,555 instructions # 2.15 insn per cycle + 1.940086470 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2195) (512y: 148) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.585962e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.778956e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.778956e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.492671e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.676216e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.676216e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.035606 sec +TOTAL : 3.114794 sec INFO: No Floating Point Exceptions have been reported - 5,553,509,524 cycles # 1.827 GHz - 8,071,821,721 instructions # 1.45 insn per cycle - 3.041168439 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1471) (512y: 129) (512z: 1684) + 5,530,850,143 cycles # 1.773 GHz + 8,071,834,418 instructions # 1.46 insn per cycle + 3.120392658 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1471) (512y: 129) (512z: 1684) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt index 0e03f4fe66..746b04ecac 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-05-15_09:38:39 +DATE: 2024-05-16_15:20:58 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.605793e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.092662e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.188361e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.594523e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.092654e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.188255e+08 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 0.527275 sec +TOTAL : 0.533303 sec INFO: No Floating Point Exceptions have been reported - 2,209,654,097 cycles # 2.866 GHz - 3,153,659,564 instructions # 1.43 insn per cycle - 0.829664465 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/gcheck.exe -p 2048 256 1 + 2,159,610,833 cycles # 2.816 GHz + 3,095,961,302 instructions # 1.43 insn per cycle + 0.825364511 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 4.313472e+00 Avg ME (F77/GPU) = 4.3134710926110280 Relative difference = 2.1036162329561614e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.917932e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.970345e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.970345e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.884407e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.935333e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.935333e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 5.578442 sec +TOTAL : 5.676327 sec INFO: No Floating Point Exceptions have been reported - 16,270,300,978 cycles # 2.915 GHz - 43,269,542,953 instructions # 2.66 insn per cycle - 5.583862261 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 662) (avx2: 0) (512y: 0) (512z: 0) + 16,262,813,557 cycles # 2.863 GHz + 43,266,807,177 instructions # 2.66 insn per cycle + 5.681729392 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 662) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105795 Relative difference = 2.1036172727915933e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.295405e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.467767e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.467767e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.290556e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.463505e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.463505e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.294249 sec +TOTAL : 3.298765 sec INFO: No Floating Point Exceptions have been reported - 9,439,488,737 cycles # 2.862 GHz - 25,429,298,825 instructions # 2.69 insn per cycle - 3.299556098 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2268) (avx2: 0) (512y: 0) (512z: 0) + 9,454,937,516 cycles # 2.862 GHz + 25,430,832,847 instructions # 2.69 insn per cycle + 3.304226277 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2268) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105804 Relative difference = 2.103617270732513e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.717381e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.065402e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.065402e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.695348e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.042916e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.042916e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.334499 sec +TOTAL : 2.345615 sec INFO: No Floating Point Exceptions have been reported - 6,268,270,664 cycles # 2.680 GHz - 13,639,651,270 instructions # 2.18 insn per cycle - 2.339998334 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2629) (512y: 0) (512z: 0) + 6,296,882,273 cycles # 2.679 GHz + 13,638,682,807 instructions # 2.17 insn per cycle + 2.351107442 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2629) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.896625e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.269348e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.269348e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.910957e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.286382e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.286382e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.253497 sec +TOTAL : 2.246822 sec INFO: No Floating Point Exceptions have been reported - 6,047,001,238 cycles # 2.678 GHz - 12,723,590,718 instructions # 2.10 insn per cycle - 2.258908112 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2146) (512y: 296) (512z: 0) + 6,026,491,701 cycles # 2.677 GHz + 12,722,860,113 instructions # 2.11 insn per cycle + 2.252413644 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2146) (512y: 296) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.435146e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.613567e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.613567e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.420299e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.596534e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.596534e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.164665 sec +TOTAL : 3.177504 sec INFO: No Floating Point Exceptions have been reported - 5,622,890,350 cycles # 1.774 GHz - 8,925,668,868 instructions # 1.59 insn per cycle - 3.170077466 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1357) (512y: 171) (512z: 1777) + 5,627,100,070 cycles # 1.769 GHz + 8,928,441,764 instructions # 1.59 insn per cycle + 3.183062200 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1357) (512y: 171) (512z: 1777) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt index 6bdff93d6b..a9079e9716 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt @@ -40,183 +40,183 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-05-15_09:39:04 +DATE: 2024-05-16_15:21:23 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.598682e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.496542e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.761806e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.566221e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.504693e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.775023e+08 ) sec^-1 MeanMatrixElemValue = ( 7.154219e+00 +- 1.620281e-01 ) GeV^0 -TOTAL : 0.485583 sec +TOTAL : 0.485925 sec INFO: No Floating Point Exceptions have been reported - 2,004,894,430 cycles # 2.817 GHz - 2,895,867,287 instructions # 1.44 insn per cycle - 0.768790632 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 + 2,003,287,538 cycles # 2.816 GHz + 2,880,414,118 instructions # 1.44 insn per cycle + 0.769648039 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 4.313490e+00 Avg ME (F77/GPU) = 4.3136695463908836 Relative difference = 4.162439020000051e-05 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.971680e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.029242e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.029242e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.938364e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.994818e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.994818e+05 ) sec^-1 MeanMatrixElemValue = ( 7.175644e+00 +- 1.658767e-01 ) GeV^0 -TOTAL : 5.407539 sec +TOTAL : 5.499884 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 15,732,291,805 cycles # 2.908 GHz - 42,223,773,670 instructions # 2.68 insn per cycle - 5.413095010 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 601) (avx2: 0) (512y: 0) (512z: 0) + 15,743,516,639 cycles # 2.861 GHz + 42,225,863,593 instructions # 2.68 insn per cycle + 5.505101290 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 601) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313574e+00 Avg ME (F77/C++) = 4.3135739049175754 Relative difference = 2.2042608890083832e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.576683e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.925395e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.925395e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.494085e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.834702e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.834702e+05 ) sec^-1 MeanMatrixElemValue = ( 7.175642e+00 +- 1.658767e-01 ) GeV^0 -TOTAL : 2.380795 sec +TOTAL : 2.423560 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,946,843,290 cycles # 2.912 GHz - 16,919,386,922 instructions # 2.44 insn per cycle - 2.386268346 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2983) (avx2: 0) (512y: 0) (512z: 0) + 6,948,197,620 cycles # 2.861 GHz + 16,919,710,710 instructions # 2.44 insn per cycle + 2.428887408 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2983) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313572e+00 Avg ME (F77/C++) = 4.3135722205042839 Relative difference = 5.111872113533787e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.963539e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.006176e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.006176e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.820914e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.816967e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.816967e+05 ) sec^-1 MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.404224 sec +TOTAL : 1.429543 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 3,861,367,030 cycles # 2.741 GHz - 7,989,911,144 instructions # 2.07 insn per cycle - 1.409406388 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3289) (512y: 0) (512z: 0) + 3,855,960,900 cycles # 2.689 GHz + 7,989,689,028 instructions # 2.07 insn per cycle + 1.434693752 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3289) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313565e+00 Avg ME (F77/C++) = 4.3135645699221641 Relative difference = 9.97035713074993e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.421817e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.587592e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.587592e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.282128e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.407558e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.407558e+05 ) sec^-1 MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.332462 sec +TOTAL : 1.355217 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 3,663,720,767 cycles # 2.741 GHz - 7,492,536,355 instructions # 2.05 insn per cycle - 1.337531859 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3036) (512y: 23) (512z: 0) + 3,662,603,190 cycles # 2.693 GHz + 7,491,885,625 instructions # 2.05 insn per cycle + 1.360533114 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3036) (512y: 23) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313565e+00 Avg ME (F77/C++) = 4.3135645699221641 Relative difference = 9.97035713074993e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.252793e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.863234e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.863234e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.072932e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.653576e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.653576e+05 ) sec^-1 MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.765566 sec +TOTAL : 1.816585 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 3,314,814,363 cycles # 1.873 GHz - 5,987,731,552 instructions # 1.81 insn per cycle - 1.770605498 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2418) (512y: 32) (512z: 2031) + 3,322,287,385 cycles # 1.825 GHz + 5,988,754,595 instructions # 1.80 insn per cycle + 1.821834164 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2418) (512y: 32) (512z: 2031) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313564e+00 diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt index 7ffa995122..0359df7b77 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt @@ -40,183 +40,183 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-05-15_09:39:24 +DATE: 2024-05-16_15:21:44 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.587420e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.516829e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.786326e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.575897e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.505600e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.778243e+08 ) sec^-1 MeanMatrixElemValue = ( 7.154219e+00 +- 1.620281e-01 ) GeV^0 -TOTAL : 0.483303 sec +TOTAL : 0.488373 sec INFO: No Floating Point Exceptions have been reported - 2,019,688,314 cycles # 2.847 GHz - 2,894,885,573 instructions # 1.43 insn per cycle - 0.766503916 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/gcheck.exe -p 2048 256 1 + 2,007,752,645 cycles # 2.812 GHz + 2,828,437,251 instructions # 1.41 insn per cycle + 0.772837040 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 4.313490e+00 Avg ME (F77/GPU) = 4.3136695463908836 Relative difference = 4.162439020000051e-05 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.023013e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.083107e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.083107e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.991117e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.050649e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.050649e+05 ) sec^-1 MeanMatrixElemValue = ( 7.175644e+00 +- 1.658767e-01 ) GeV^0 -TOTAL : 5.272574 sec +TOTAL : 5.356246 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 15,334,092,326 cycles # 2.907 GHz - 42,471,073,635 instructions # 2.77 insn per cycle - 5.277803364 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 559) (avx2: 0) (512y: 0) (512z: 0) + 15,339,535,429 cycles # 2.862 GHz + 42,474,905,629 instructions # 2.77 insn per cycle + 5.361339903 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 559) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313574e+00 Avg ME (F77/C++) = 4.3135739491553977 Relative difference = 1.1787117204016727e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.212204e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.665805e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.665805e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.134209e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.583662e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.583662e+05 ) sec^-1 MeanMatrixElemValue = ( 7.175642e+00 +- 1.658767e-01 ) GeV^0 -TOTAL : 2.100750 sec +TOTAL : 2.132369 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,119,971,132 cycles # 2.907 GHz - 16,261,719,943 instructions # 2.66 insn per cycle - 2.106003651 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2702) (avx2: 0) (512y: 0) (512z: 0) + 6,119,263,046 cycles # 2.864 GHz + 16,261,701,502 instructions # 2.66 insn per cycle + 2.137647028 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2702) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313572e+00 Avg ME (F77/C++) = 4.3135722205042839 Relative difference = 5.111872113533787e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.593790e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.292755e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.292755e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.498649e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.173623e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.173623e+05 ) sec^-1 MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.679487 sec +TOTAL : 1.703269 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 4,592,127,675 cycles # 2.728 GHz - 9,041,761,961 instructions # 1.97 insn per cycle - 1.684724917 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3558) (512y: 0) (512z: 0) + 4,581,699,390 cycles # 2.683 GHz + 9,041,394,873 instructions # 1.97 insn per cycle + 1.708700782 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3558) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313565e+00 Avg ME (F77/C++) = 4.3135645687580109 Relative difference = 9.997345323075056e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.878026e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.633469e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.633469e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.705142e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.424759e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.424759e+05 ) sec^-1 MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.612554 sec +TOTAL : 1.652652 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 4,407,162,057 cycles # 2.725 GHz - 8,532,871,968 instructions # 1.94 insn per cycle - 1.617874278 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3311) (512y: 10) (512z: 0) + 4,411,023,052 cycles # 2.662 GHz + 8,532,140,610 instructions # 1.93 insn per cycle + 1.658018216 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3311) (512y: 10) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313565e+00 Avg ME (F77/C++) = 4.3135645687580109 Relative difference = 9.997345323075056e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.270771e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.879084e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.879084e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.118773e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.709641e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.709641e+05 ) sec^-1 MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.760317 sec +TOTAL : 1.803301 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 3,305,853,448 cycles # 1.874 GHz - 5,956,992,727 instructions # 1.80 insn per cycle - 1.765293566 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2357) (512y: 32) (512z: 2014) + 3,302,699,675 cycles # 1.827 GHz + 5,958,419,273 instructions # 1.80 insn per cycle + 1.808538430 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2357) (512y: 32) (512z: 2014) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313564e+00 diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt index 80813ef825..4345b3c851 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt @@ -40,183 +40,183 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-05-15_09:39:45 +DATE: 2024-05-16_15:22:04 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.601305e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.088563e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.185234e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.596790e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.087710e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.182609e+08 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 0.525521 sec +TOTAL : 0.531791 sec INFO: No Floating Point Exceptions have been reported - 2,168,258,905 cycles # 2.853 GHz - 3,138,873,540 instructions # 1.45 insn per cycle - 0.816588080 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/gcheck.exe -p 2048 256 1 + 2,158,479,665 cycles # 2.816 GHz + 3,115,947,911 instructions # 1.44 insn per cycle + 0.824595914 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 4.313472e+00 Avg ME (F77/GPU) = 4.3134711012809239 Relative difference = 2.0835166567625394e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.766882e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.810773e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.810773e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.739846e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.783362e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.783362e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 6.045671 sec +TOTAL : 6.139062 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 17,574,296,306 cycles # 2.905 GHz - 41,766,230,605 instructions # 2.38 insn per cycle - 6.051094127 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 655) (avx2: 0) (512y: 0) (512z: 0) + 17,579,172,412 cycles # 2.862 GHz + 41,767,715,738 instructions # 2.38 insn per cycle + 6.144566394 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 655) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.044146e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.188152e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.188152e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.944235e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.080846e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.080846e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.555742 sec +TOTAL : 3.674671 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 10,147,045,588 cycles # 2.850 GHz - 26,353,270,775 instructions # 2.60 insn per cycle - 3.561135432 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2438) (avx2: 0) (512y: 0) (512z: 0) + 10,157,870,701 cycles # 2.761 GHz + 26,355,211,403 instructions # 2.59 insn per cycle + 3.680088821 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2438) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.562224e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.889108e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.889108e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.512494e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.830362e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.830362e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.410975 sec +TOTAL : 2.435332 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,505,852,527 cycles # 2.694 GHz - 12,121,176,067 instructions # 1.86 insn per cycle - 2.416418980 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2718) (512y: 0) (512z: 0) + 6,512,604,303 cycles # 2.669 GHz + 12,120,159,732 instructions # 1.86 insn per cycle + 2.440902409 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2718) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134712319139954 Relative difference = 1.7806676491157786e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.021202e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.409456e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.409456e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.920988e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.300442e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.300442e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.199331 sec +TOTAL : 2.244169 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,010,625,125 cycles # 2.728 GHz - 11,226,506,585 instructions # 1.87 insn per cycle - 2.204814427 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2369) (512y: 150) (512z: 0) + 6,018,583,564 cycles # 2.676 GHz + 11,228,279,694 instructions # 1.87 insn per cycle + 2.249711111 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2369) (512y: 150) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134712319139954 Relative difference = 1.7806676491157786e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.252088e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.409014e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.409014e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.148571e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.297302e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.297302e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.335937 sec +TOTAL : 3.442171 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,078,181,761 cycles # 1.820 GHz - 8,214,656,148 instructions # 1.35 insn per cycle - 3.341517301 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1787) (512y: 134) (512z: 1755) + 6,072,730,798 cycles # 1.762 GHz + 8,215,005,190 instructions # 1.35 insn per cycle + 3.447734816 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1787) (512y: 134) (512z: 1755) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt index 58967d5980..fc67fec042 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt @@ -40,183 +40,183 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-05-15_09:40:11 +DATE: 2024-05-16_15:22:31 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.609019e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.095172e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.191696e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.615689e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.096145e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.193163e+08 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 0.528086 sec +TOTAL : 0.527662 sec INFO: No Floating Point Exceptions have been reported - 2,169,297,309 cycles # 2.846 GHz - 3,090,623,102 instructions # 1.42 insn per cycle - 0.819052248 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/gcheck.exe -p 2048 256 1 + 2,187,091,067 cycles # 2.822 GHz + 3,143,599,790 instructions # 1.44 insn per cycle + 0.831715891 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 4.313472e+00 Avg ME (F77/GPU) = 4.3134711012809239 Relative difference = 2.0835166567625394e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.775858e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.820247e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.820247e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.750132e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.794255e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.794255e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 6.014989 sec +TOTAL : 6.103500 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 17,468,050,475 cycles # 2.902 GHz - 43,049,777,107 instructions # 2.46 insn per cycle - 6.020366459 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 651) (avx2: 0) (512y: 0) (512z: 0) + 17,473,867,626 cycles # 2.861 GHz + 43,052,630,037 instructions # 2.46 insn per cycle + 6.108967949 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 651) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.202203e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.362005e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.362005e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.176372e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.336517e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.336517e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.384891 sec +TOTAL : 3.414423 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 9,778,733,081 cycles # 2.885 GHz - 25,167,358,615 instructions # 2.57 insn per cycle - 3.390286796 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2276) (avx2: 0) (512y: 0) (512z: 0) + 9,783,940,024 cycles # 2.862 GHz + 25,167,910,576 instructions # 2.57 insn per cycle + 3.420037518 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2276) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.282641e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.561981e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.561981e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.178030e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.451835e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.451835e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.558618 sec +TOTAL : 2.622185 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 7,005,125,692 cycles # 2.733 GHz - 12,790,366,975 instructions # 1.83 insn per cycle - 2.564023768 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2699) (512y: 0) (512z: 0) + 7,019,924,583 cycles # 2.672 GHz + 12,790,606,448 instructions # 1.82 insn per cycle + 2.627804246 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2699) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134712319139954 Relative difference = 1.7806676491157786e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.605710e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.928534e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.928534e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.488078e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.801083e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.801083e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.386989 sec +TOTAL : 2.447720 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,538,720,575 cycles # 2.734 GHz - 12,108,614,885 instructions # 1.85 insn per cycle - 2.392368081 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2351) (512y: 227) (512z: 0) + 6,546,937,322 cycles # 2.670 GHz + 12,109,881,739 instructions # 1.85 insn per cycle + 2.453162643 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2351) (512y: 227) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134712319139954 Relative difference = 1.7806676491157786e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.076149e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.216751e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.216751e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.983756e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.117708e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.117708e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.520703 sec +TOTAL : 3.627336 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,385,528,551 cycles # 1.811 GHz - 8,984,473,699 instructions # 1.41 insn per cycle - 3.526309284 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1892) (512y: 178) (512z: 2083) + 6,377,996,877 cycles # 1.756 GHz + 8,984,744,450 instructions # 1.41 insn per cycle + 3.632964633 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1892) (512y: 178) (512z: 2083) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt index bc7d09f4c2..f2a95b68c4 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-05-15_09:37:13 +DATE: 2024-05-16_15:19:32 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/gcheck.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.202067e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.226429e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.231093e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.205899e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.229515e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.233614e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.468626 sec +TOTAL : 0.467479 sec INFO: No Floating Point Exceptions have been reported - 1,953,008,185 cycles # 2.841 GHz - 2,842,111,675 instructions # 1.46 insn per cycle - 0.746379585 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/gcheck.exe -p 1 256 1 + 1,929,394,895 cycles # 2.809 GHz + 2,774,653,842 instructions # 1.44 insn per cycle + 0.745241861 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.845433e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.983223e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.992360e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.854750e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.994181e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.003911e+05 ) sec^-1 MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 -TOTAL : 0.483498 sec +TOTAL : 0.485365 sec INFO: No Floating Point Exceptions have been reported - 2,009,105,725 cycles # 2.845 GHz - 2,994,395,706 instructions # 1.49 insn per cycle - 0.764235106 seconds time elapsed + 1,990,830,698 cycles # 2.816 GHz + 2,942,277,354 instructions # 1.48 insn per cycle + 0.765598417 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 8.127459e-06 Avg ME (F77/GPU) = 8.1274562860176604E-006 Relative difference = 3.3392753366481633e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.341006e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.344191e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.344191e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.339413e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.342602e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.342602e+03 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.163389 sec +TOTAL : 0.163415 sec INFO: No Floating Point Exceptions have been reported - 476,471,724 cycles # 2.858 GHz - 1,396,834,040 instructions # 2.93 insn per cycle - 0.167338927 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3991) (avx2: 0) (512y: 0) (512z: 0) + 474,956,853 cycles # 2.847 GHz + 1,396,923,375 instructions # 2.94 insn per cycle + 0.167372542 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3991) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167185E-006 Relative difference = 3.339276495559746e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.360618e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.372071e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.372071e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.350685e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.362490e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.362490e+03 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.088055 sec +TOTAL : 0.088282 sec INFO: No Floating Point Exceptions have been reported - 246,212,528 cycles # 2.691 GHz - 699,164,769 instructions # 2.84 insn per cycle - 0.092128091 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 9501) (avx2: 0) (512y: 0) (512z: 0) + 246,129,842 cycles # 2.680 GHz + 699,160,574 instructions # 2.84 insn per cycle + 0.092454839 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9501) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167168E-006 Relative difference = 3.3392764976441195e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.397376e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.402958e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.402958e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.421076e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.426847e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.426847e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.042872 sec +TOTAL : 0.042262 sec INFO: No Floating Point Exceptions have been reported - 121,563,038 cycles # 2.617 GHz - 260,039,900 instructions # 2.14 insn per cycle - 0.046942965 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 8227) (512y: 0) (512z: 0) + 120,513,094 cycles # 2.641 GHz + 260,079,134 instructions # 2.16 insn per cycle + 0.046206481 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8227) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.633036e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.640418e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.640418e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.614262e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.622122e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.622122e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.037445 sec +TOTAL : 0.037855 sec INFO: No Floating Point Exceptions have been reported - 108,374,861 cycles # 2.658 GHz - 240,137,018 instructions # 2.22 insn per cycle - 0.041385204 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 7348) (512y: 150) (512z: 0) + 109,022,775 cycles # 2.645 GHz + 240,308,972 instructions # 2.20 insn per cycle + 0.041904895 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7348) (512y: 150) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.199506e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.204802e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.204802e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.170349e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.175260e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.175260e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.049113 sec +TOTAL : 0.050252 sec INFO: No Floating Point Exceptions have been reported - 96,354,524 cycles # 1.837 GHz - 138,443,095 instructions # 1.44 insn per cycle - 0.053009261 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1692) (512y: 126) (512z: 6592) + 96,595,554 cycles # 1.802 GHz + 138,452,128 instructions # 1.43 insn per cycle + 0.054148545 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1692) (512y: 126) (512z: 6592) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt index a46b9e59f6..ca894b0a6d 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-05-15_09:37:23 +DATE: 2024-05-16_15:19:42 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/gcheck.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.243744e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.268803e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.272916e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.237277e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.263102e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.267367e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.468608 sec +TOTAL : 0.467317 sec INFO: No Floating Point Exceptions have been reported - 1,942,160,737 cycles # 2.818 GHz - 2,807,580,866 instructions # 1.45 insn per cycle - 0.748749095 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/gcheck.exe -p 1 256 1 + 1,933,877,717 cycles # 2.813 GHz + 2,829,779,417 instructions # 1.46 insn per cycle + 0.746133258 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.939505e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.078673e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.089031e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.945887e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.087010e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.096853e+05 ) sec^-1 MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 -TOTAL : 0.486285 sec +TOTAL : 0.483889 sec INFO: No Floating Point Exceptions have been reported - 1,994,044,298 cycles # 2.816 GHz - 2,960,112,232 instructions # 1.48 insn per cycle - 0.766502280 seconds time elapsed + 2,005,783,112 cycles # 2.816 GHz + 2,927,359,248 instructions # 1.46 insn per cycle + 0.768925329 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 8.127459e-06 Avg ME (F77/GPU) = 8.1274562860176604E-006 Relative difference = 3.3392753366481633e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.338112e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.341354e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.341354e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.344408e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.347652e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.347652e+03 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.162906 sec +TOTAL : 0.162339 sec INFO: No Floating Point Exceptions have been reported - 472,809,594 cycles # 2.841 GHz - 1,391,971,221 instructions # 2.94 insn per cycle - 0.167023132 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3869) (avx2: 0) (512y: 0) (512z: 0) + 471,806,818 cycles # 2.848 GHz + 1,391,948,601 instructions # 2.95 insn per cycle + 0.166295977 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3869) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167185E-006 Relative difference = 3.339276495559746e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.300367e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.312721e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.312721e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.367799e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.379601e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.379601e+03 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.088323 sec +TOTAL : 0.087176 sec INFO: No Floating Point Exceptions have been reported - 245,693,132 cycles # 2.673 GHz - 695,206,213 instructions # 2.83 insn per cycle - 0.092508234 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 9537) (avx2: 0) (512y: 0) (512z: 0) + 243,999,829 cycles # 2.694 GHz + 695,186,413 instructions # 2.85 insn per cycle + 0.091139423 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9537) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167168E-006 Relative difference = 3.3392764976441195e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.397203e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.402750e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.402750e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.395387e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.400899e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.400899e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.042230 sec +TOTAL : 0.042187 sec INFO: No Floating Point Exceptions have been reported - 120,165,191 cycles # 2.630 GHz - 255,644,551 instructions # 2.13 insn per cycle - 0.046411879 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 8181) (512y: 0) (512z: 0) + 119,801,052 cycles # 2.624 GHz + 255,741,591 instructions # 2.13 insn per cycle + 0.046174431 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8181) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.585636e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.592927e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.592927e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.613988e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.621406e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.621406e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.037822 sec +TOTAL : 0.037041 sec INFO: No Floating Point Exceptions have been reported - 108,310,856 cycles # 2.609 GHz - 235,872,181 instructions # 2.18 insn per cycle - 0.042148151 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 7301) (512y: 150) (512z: 0) + 106,534,081 cycles # 2.639 GHz + 235,917,118 instructions # 2.21 insn per cycle + 0.041041363 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7301) (512y: 150) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.168091e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.173496e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.173496e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.167962e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.172897e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.172897e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.049868 sec +TOTAL : 0.049548 sec INFO: No Floating Point Exceptions have been reported - 95,370,186 cycles # 1.788 GHz - 133,947,611 instructions # 1.40 insn per cycle - 0.053920194 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1641) (512y: 126) (512z: 6597) + 94,554,513 cycles # 1.786 GHz + 133,899,064 instructions # 1.42 insn per cycle + 0.053428613 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1641) (512y: 126) (512z: 6597) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt index be11ef2861..f86e27869e 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-05-15_09:37:33 +DATE: 2024-05-16_15:19:53 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/gcheck.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.538093e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.550084e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.553430e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.541598e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.553658e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.556693e+04 ) sec^-1 MeanMatrixElemValue = ( 7.188142e-04 +- 6.565203e-04 ) GeV^-4 -TOTAL : 0.468250 sec +TOTAL : 0.467629 sec INFO: No Floating Point Exceptions have been reported - 1,976,256,669 cycles # 2.846 GHz - 2,862,455,030 instructions # 1.45 insn per cycle - 0.751246171 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/gcheck.exe -p 1 256 1 + 1,964,166,954 cycles # 2.815 GHz + 2,823,406,286 instructions # 1.44 insn per cycle + 0.754117473 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.618726e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.730175e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.743445e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.614317e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.731134e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.742615e+05 ) sec^-1 MeanMatrixElemValue = ( 8.020493e-03 +- 4.025604e-03 ) GeV^-4 -TOTAL : 0.470462 sec +TOTAL : 0.468434 sec INFO: No Floating Point Exceptions have been reported - 1,945,323,505 cycles # 2.835 GHz - 2,854,909,440 instructions # 1.47 insn per cycle - 0.744046605 seconds time elapsed + 1,946,164,211 cycles # 2.817 GHz + 2,847,399,547 instructions # 1.46 insn per cycle + 0.748191861 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 8.127250e-06 Avg ME (F77/GPU) = 8.1272870954487585E-006 Relative difference = 4.564329725014175e-06 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.493617e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.497008e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.497008e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.448019e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.451516e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.451516e+03 ) sec^-1 MeanMatrixElemValue = ( 7.177153e-04 +- 6.554185e-04 ) GeV^-4 -TOTAL : 0.156308 sec +TOTAL : 0.158474 sec INFO: No Floating Point Exceptions have been reported - 460,938,113 cycles # 2.887 GHz - 1,393,426,941 instructions # 3.02 insn per cycle - 0.160226520 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3070) (avx2: 0) (512y: 0) (512z: 0) + 461,638,972 cycles # 2.852 GHz + 1,393,493,000 instructions # 3.02 insn per cycle + 0.162490485 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3070) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127811e-06 Avg ME (F77/C++) = 8.1278105211728276E-006 Relative difference = 5.891219330978222e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.209351e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.214237e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.214237e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.201120e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.205395e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.205395e+04 ) sec^-1 MeanMatrixElemValue = ( 7.177152e-04 +- 6.554185e-04 ) GeV^-4 -TOTAL : 0.048337 sec +TOTAL : 0.048902 sec INFO: No Floating Point Exceptions have been reported - 137,973,328 cycles # 2.667 GHz - 375,722,004 instructions # 2.72 insn per cycle - 0.052233489 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:10134) (avx2: 0) (512y: 0) (512z: 0) + 138,099,810 cycles # 2.644 GHz + 375,723,801 instructions # 2.72 insn per cycle + 0.052805368 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:10134) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127809e-06 Avg ME (F77/C++) = 8.1278090510674588E-006 Relative difference = 6.2830535070193674e-09 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.738682e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.760820e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.760820e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.699468e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.721720e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.721720e+04 ) sec^-1 MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 -TOTAL : 0.024033 sec +TOTAL : 0.024500 sec INFO: No Floating Point Exceptions have been reported - 71,841,958 cycles # 2.624 GHz - 146,656,247 instructions # 2.04 insn per cycle - 0.027862943 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 8933) (512y: 0) (512z: 0) + 72,431,086 cycles # 2.595 GHz + 146,734,646 instructions # 2.03 insn per cycle + 0.028413255 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8933) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127537e-06 Avg ME (F77/C++) = 8.1275366216540664E-006 Relative difference = 4.655111786058001e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.147936e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.177477e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.177477e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.950281e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.979563e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.979563e+04 ) sec^-1 MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 -TOTAL : 0.021655 sec +TOTAL : 0.023199 sec INFO: No Floating Point Exceptions have been reported - 66,095,413 cycles # 2.653 GHz - 136,520,360 instructions # 2.07 insn per cycle - 0.025536275 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 8164) (512y: 28) (512z: 0) + 67,511,576 cycles # 2.517 GHz + 136,466,222 instructions # 2.02 insn per cycle + 0.027372188 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8164) (512y: 28) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127537e-06 Avg ME (F77/C++) = 8.1275366216540664E-006 Relative difference = 4.655111786058001e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.342092e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.362212e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.362212e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.260359e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.280493e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.280493e+04 ) sec^-1 MeanMatrixElemValue = ( 7.165747e-04 +- 6.542824e-04 ) GeV^-4 -TOTAL : 0.027664 sec +TOTAL : 0.028479 sec INFO: No Floating Point Exceptions have been reported - 59,063,912 cycles # 1.907 GHz - 85,174,422 instructions # 1.44 insn per cycle - 0.031611330 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2572) (512y: 32) (512z: 6935) + 59,124,236 cycles # 1.860 GHz + 85,286,285 instructions # 1.44 insn per cycle + 0.032355670 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2572) (512y: 32) (512z: 6935) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127537e-06 Avg ME (F77/C++) = 8.1275369863475849E-006 Relative difference = 1.6797726498700304e-09 diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt index 5c9bac71df..2af7dd76f9 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-05-15_09:37:44 +DATE: 2024-05-16_15:20:03 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/gcheck.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.566284e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.577247e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.580384e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.561126e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.572400e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.575387e+04 ) sec^-1 MeanMatrixElemValue = ( 7.188142e-04 +- 6.565203e-04 ) GeV^-4 -TOTAL : 0.467516 sec +TOTAL : 0.469592 sec INFO: No Floating Point Exceptions have been reported - 1,965,266,310 cycles # 2.854 GHz - 2,875,934,182 instructions # 1.46 insn per cycle - 0.745802230 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/gcheck.exe -p 1 256 1 + 1,933,901,131 cycles # 2.816 GHz + 2,803,636,036 instructions # 1.45 insn per cycle + 0.744726293 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.913393e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.003584e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.005156e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.901730e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.003706e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.005157e+06 ) sec^-1 MeanMatrixElemValue = ( 8.020495e-03 +- 4.025606e-03 ) GeV^-4 -TOTAL : 0.469176 sec +TOTAL : 0.471774 sec INFO: No Floating Point Exceptions have been reported - 1,950,555,056 cycles # 2.845 GHz - 2,785,891,797 instructions # 1.43 insn per cycle - 0.743443978 seconds time elapsed + 1,934,886,385 cycles # 2.815 GHz + 2,830,776,229 instructions # 1.46 insn per cycle + 0.746474254 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 8.127250e-06 Avg ME (F77/GPU) = 8.1272870252982758E-006 Relative difference = 4.555698209723637e-06 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.466728e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.470173e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.470173e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.452227e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.455705e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.455705e+03 ) sec^-1 MeanMatrixElemValue = ( 7.177153e-04 +- 6.554185e-04 ) GeV^-4 -TOTAL : 0.156828 sec +TOTAL : 0.157329 sec INFO: No Floating Point Exceptions have been reported - 459,335,817 cycles # 2.869 GHz - 1,388,443,506 instructions # 3.02 insn per cycle - 0.160735073 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2959) (avx2: 0) (512y: 0) (512z: 0) + 458,573,657 cycles # 2.854 GHz + 1,388,574,447 instructions # 3.03 insn per cycle + 0.161242660 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2959) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127811e-06 Avg ME (F77/C++) = 8.1278105211728276E-006 Relative difference = 5.891219330978222e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.220593e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.225034e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.225034e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.204538e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.208976e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.208976e+04 ) sec^-1 MeanMatrixElemValue = ( 7.177152e-04 +- 6.554185e-04 ) GeV^-4 -TOTAL : 0.047637 sec +TOTAL : 0.047932 sec INFO: No Floating Point Exceptions have been reported - 136,945,111 cycles # 2.688 GHz - 371,014,842 instructions # 2.71 insn per cycle - 0.051574317 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:10117) (avx2: 0) (512y: 0) (512z: 0) + 136,097,535 cycles # 2.652 GHz + 371,027,952 instructions # 2.73 insn per cycle + 0.051946079 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:10117) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127809e-06 Avg ME (F77/C++) = 8.1278090510674588E-006 Relative difference = 6.2830535070193674e-09 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.683853e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.705729e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.705729e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.559391e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.580217e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.580217e+04 ) sec^-1 MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 -TOTAL : 0.023981 sec +TOTAL : 0.024960 sec INFO: No Floating Point Exceptions have been reported - 70,282,423 cycles # 2.572 GHz - 141,956,175 instructions # 2.02 insn per cycle - 0.027873689 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 8887) (512y: 0) (512z: 0) + 71,167,021 cycles # 2.517 GHz + 142,031,155 instructions # 2.00 insn per cycle + 0.028974311 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8887) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127537e-06 Avg ME (F77/C++) = 8.1275366216540664E-006 Relative difference = 4.655111786058001e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.113105e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.146058e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.146058e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.102195e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.131341e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.131341e+04 ) sec^-1 MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 -TOTAL : 0.021054 sec +TOTAL : 0.021142 sec INFO: No Floating Point Exceptions have been reported - 63,964,669 cycles # 2.622 GHz - 131,721,900 instructions # 2.06 insn per cycle - 0.024943326 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 8117) (512y: 28) (512z: 0) + 63,906,261 cycles # 2.611 GHz + 131,729,034 instructions # 2.06 insn per cycle + 0.025029577 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8117) (512y: 28) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127537e-06 Avg ME (F77/C++) = 8.1275366216540664E-006 Relative difference = 4.655111786058001e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.362624e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.384309e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.384309e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.321655e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.342179e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.342179e+04 ) sec^-1 MeanMatrixElemValue = ( 7.165747e-04 +- 6.542824e-04 ) GeV^-4 -TOTAL : 0.026667 sec +TOTAL : 0.027241 sec INFO: No Floating Point Exceptions have been reported - 57,079,965 cycles # 1.898 GHz - 80,394,091 instructions # 1.41 insn per cycle - 0.030711995 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2521) (512y: 32) (512z: 6939) + 57,621,926 cycles # 1.879 GHz + 80,488,160 instructions # 1.40 insn per cycle + 0.031258526 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2521) (512y: 32) (512z: 6939) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127537e-06 Avg ME (F77/C++) = 8.1275369863475849E-006 Relative difference = 1.6797726498700304e-09 diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt index e734cd7c84..16ac12981a 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-05-15_09:37:54 +DATE: 2024-05-16_15:20:13 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/gcheck.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.175427e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.197973e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.201962e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.172533e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.195464e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.199217e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.467363 sec +TOTAL : 0.467059 sec INFO: No Floating Point Exceptions have been reported - 1,944,433,279 cycles # 2.832 GHz - 2,794,631,115 instructions # 1.44 insn per cycle - 0.744919527 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/gcheck.exe -p 1 256 1 + 1,929,783,722 cycles # 2.812 GHz + 2,830,067,082 instructions # 1.47 insn per cycle + 0.744348567 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.821650e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.958980e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.968240e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.817494e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.954472e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.963776e+05 ) sec^-1 MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 -TOTAL : 0.483179 sec +TOTAL : 0.485210 sec INFO: No Floating Point Exceptions have been reported - 2,015,415,638 cycles # 2.820 GHz - 2,968,884,378 instructions # 1.47 insn per cycle - 0.771332872 seconds time elapsed + 1,989,265,248 cycles # 2.816 GHz + 2,972,405,087 instructions # 1.49 insn per cycle + 0.764721680 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 8.127459e-06 Avg ME (F77/GPU) = 8.1274562879405200E-006 Relative difference = 3.3369094561706885e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.315942e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.319079e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.319079e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.312127e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.315249e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.315249e+03 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.164432 sec +TOTAL : 0.164695 sec INFO: No Floating Point Exceptions have been reported - 477,932,083 cycles # 2.852 GHz - 1,405,300,949 instructions # 2.94 insn per cycle - 0.168321285 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3977) (avx2: 0) (512y: 0) (512z: 0) + 479,517,658 cycles # 2.854 GHz + 1,405,303,424 instructions # 2.93 insn per cycle + 0.168655160 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3977) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562948736117E-006 Relative difference = 3.32837900190667e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.596291e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.608589e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.608589e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.589174e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.601629e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.601629e+03 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.084924 sec +TOTAL : 0.085009 sec INFO: No Floating Point Exceptions have been reported - 242,704,238 cycles # 2.749 GHz - 690,951,648 instructions # 2.85 insn per cycle - 0.088916082 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 9324) (avx2: 0) (512y: 0) (512z: 0) + 242,672,694 cycles # 2.748 GHz + 691,102,866 instructions # 2.85 insn per cycle + 0.088915527 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9324) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563175290919E-006 Relative difference = 3.3005037703909805e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.385512e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.391544e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.391544e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.402863e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.409241e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.409241e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.043079 sec +TOTAL : 0.042757 sec INFO: No Floating Point Exceptions have been reported - 119,944,572 cycles # 2.584 GHz - 257,870,362 instructions # 2.15 insn per cycle - 0.047077076 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 8244) (512y: 0) (512z: 0) + 119,836,607 cycles # 2.596 GHz + 257,882,084 instructions # 2.15 insn per cycle + 0.046733316 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8244) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.610632e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.619218e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.619218e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.611690e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.620124e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.620124e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.037689 sec +TOTAL : 0.037856 sec INFO: No Floating Point Exceptions have been reported - 107,352,604 cycles # 2.621 GHz - 238,160,282 instructions # 2.22 insn per cycle - 0.041537470 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 7342) (512y: 146) (512z: 0) + 108,462,768 cycles # 2.631 GHz + 238,127,423 instructions # 2.20 insn per cycle + 0.041890123 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7342) (512y: 146) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.148363e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.153118e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.153118e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.150674e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.155466e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.155466e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.051095 sec +TOTAL : 0.051477 sec INFO: No Floating Point Exceptions have been reported - 97,982,591 cycles # 1.800 GHz - 139,316,915 instructions # 1.42 insn per cycle - 0.055005350 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1953) (512y: 122) (512z: 6323) + 99,538,839 cycles # 1.810 GHz + 139,339,349 instructions # 1.40 insn per cycle + 0.055665824 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1953) (512y: 122) (512z: 6323) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt index 1d6593cd77..96180e8a09 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt @@ -40,190 +40,190 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-05-15_09:38:04 +DATE: 2024-05-16_15:20:23 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/gcheck.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.209377e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.234207e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.238140e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.207087e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.230616e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.234507e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.464907 sec +TOTAL : 0.468179 sec INFO: No Floating Point Exceptions have been reported - 1,935,276,475 cycles # 2.820 GHz - 2,840,096,326 instructions # 1.47 insn per cycle - 0.743011197 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/gcheck.exe -p 1 256 1 + 1,938,727,271 cycles # 2.813 GHz + 2,835,562,501 instructions # 1.46 insn per cycle + 0.747262841 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.925364e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.067051e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.077128e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.924846e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.065621e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.075056e+05 ) sec^-1 MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 -TOTAL : 0.486874 sec +TOTAL : 0.482793 sec INFO: No Floating Point Exceptions have been reported - 1,995,098,280 cycles # 2.817 GHz - 2,948,453,060 instructions # 1.48 insn per cycle - 0.766894465 seconds time elapsed + 2,011,507,022 cycles # 2.818 GHz + 2,962,288,052 instructions # 1.47 insn per cycle + 0.770325801 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 8.127459e-06 Avg ME (F77/GPU) = 8.1274562879405200E-006 Relative difference = 3.3369094561706885e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.322818e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.325985e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.325985e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.325014e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.328184e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.328184e+03 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.163700 sec +TOTAL : 0.163699 sec INFO: No Floating Point Exceptions have been reported - 476,501,052 cycles # 2.855 GHz - 1,400,760,181 instructions # 2.94 insn per cycle - 0.167774037 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3871) (avx2: 0) (512y: 0) (512z: 0) + 475,740,171 cycles # 2.851 GHz + 1,400,755,519 instructions # 2.94 insn per cycle + 0.167716370 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3871) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562948736117E-006 Relative difference = 3.32837900190667e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.565327e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.577506e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.577506e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.586616e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.599028e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.599028e+03 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.084744 sec +TOTAL : 0.084604 sec INFO: No Floating Point Exceptions have been reported - 241,875,702 cycles # 2.743 GHz - 687,449,657 instructions # 2.84 insn per cycle - 0.088833048 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 9365) (avx2: 0) (512y: 0) (512z: 0) + 242,310,895 cycles # 2.753 GHz + 687,440,781 instructions # 2.84 insn per cycle + 0.088664129 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9365) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563175290919E-006 Relative difference = 3.3005037703909805e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.428402e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.434217e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.434217e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.421509e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.427219e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.427219e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.041218 sec +TOTAL : 0.041396 sec INFO: No Floating Point Exceptions have been reported - 117,891,142 cycles # 2.639 GHz - 253,477,844 instructions # 2.15 insn per cycle - 0.045185184 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 8196) (512y: 0) (512z: 0) + 117,633,598 cycles # 2.630 GHz + 253,582,281 instructions # 2.16 insn per cycle + 0.045344869 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8196) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.598792e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.606341e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.606341e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.533249e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.540083e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.540083e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.037394 sec +TOTAL : 0.038782 sec INFO: No Floating Point Exceptions have been reported - 106,494,211 cycles # 2.610 GHz - 233,756,310 instructions # 2.20 insn per cycle - 0.041406370 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 7292) (512y: 146) (512z: 0) + 106,121,372 cycles # 2.518 GHz + 233,883,831 instructions # 2.20 insn per cycle + 0.042791740 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7292) (512y: 146) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check.exe -p 1 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.150507e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.155231e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.155231e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.148151e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.152898e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.152898e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.050250 sec +TOTAL : 0.050273 sec INFO: No Floating Point Exceptions have been reported - 95,987,950 cycles # 1.787 GHz - 134,827,619 instructions # 1.40 insn per cycle - 0.054216055 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1904) (512y: 122) (512z: 6323) + 95,562,086 cycles # 1.781 GHz + 134,760,547 instructions # 1.41 insn per cycle + 0.054201969 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1904) (512y: 122) (512z: 6323) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt index 4a0cfb79ea..15f8e8659d 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-05-15_09:36:03 +DATE: 2024-05-16_15:18:21 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.827605e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.737160e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.400698e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.830621e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.798641e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.407520e+08 ) sec^-1 MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.517447 sec +TOTAL : 0.518896 sec INFO: No Floating Point Exceptions have been reported - 2,142,204,484 cycles # 2.853 GHz - 3,057,175,497 instructions # 1.43 insn per cycle - 0.808706740 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 + 2,130,015,467 cycles # 2.824 GHz + 3,049,782,764 instructions # 1.43 insn per cycle + 0.811167083 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 132 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 4.232897e-01 Avg ME (F77/GPU) = 0.42328961386341946 Relative difference = 2.034932117056294e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.781022e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.130898e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.130898e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.652167e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.115593e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.115593e+06 ) sec^-1 MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 1.185103 sec +TOTAL : 1.200987 sec INFO: No Floating Point Exceptions have been reported - 3,455,291,899 cycles # 2.905 GHz - 8,714,409,721 instructions # 2.52 insn per cycle - 1.190503621 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 458) (avx2: 0) (512y: 0) (512z: 0) + 3,451,141,340 cycles # 2.863 GHz + 8,714,346,508 instructions # 2.53 insn per cycle + 1.206502072 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 458) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341951 Relative difference = 2.0349321157448718e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.557149e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.023679e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.023679e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.615216e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.136998e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.136998e+06 ) sec^-1 MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.787167 sec +TOTAL : 0.764589 sec INFO: No Floating Point Exceptions have been reported - 2,200,314,542 cycles # 2.778 GHz - 5,465,483,454 instructions # 2.48 insn per cycle - 0.792724066 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1298) (avx2: 0) (512y: 0) (512z: 0) + 2,197,801,743 cycles # 2.856 GHz + 5,465,338,789 instructions # 2.49 insn per cycle + 0.770190206 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1298) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341951 Relative difference = 2.0349321157448718e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.319860e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.471499e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.471499e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.276018e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.408168e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.408168e+06 ) sec^-1 MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.566271 sec +TOTAL : 0.576218 sec INFO: No Floating Point Exceptions have been reported - 1,593,859,432 cycles # 2.791 GHz - 3,181,682,971 instructions # 2.00 insn per cycle - 0.571741699 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1459) (512y: 0) (512z: 0) + 1,593,709,911 cycles # 2.743 GHz + 3,182,241,147 instructions # 2.00 insn per cycle + 0.581747530 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1459) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341946 Relative difference = 2.034932117056294e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.403259e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.666143e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.666143e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.349428e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.560869e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.560869e+06 ) sec^-1 MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.551298 sec +TOTAL : 0.561533 sec INFO: No Floating Point Exceptions have been reported - 1,554,857,410 cycles # 2.795 GHz - 3,083,603,727 instructions # 1.98 insn per cycle - 0.556804001 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1274) (512y: 95) (512z: 0) + 1,552,006,209 cycles # 2.741 GHz + 3,083,871,547 instructions # 1.99 insn per cycle + 0.567100846 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1274) (512y: 95) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341946 Relative difference = 2.034932117056294e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.142477e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.082178e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.082178e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.103380e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.012957e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.012957e+06 ) sec^-1 MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.605015 sec +TOTAL : 0.614313 sec INFO: No Floating Point Exceptions have been reported - 1,346,241,870 cycles # 2.207 GHz - 2,376,266,453 instructions # 1.77 insn per cycle - 0.610579465 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 584) (512y: 62) (512z: 953) + 1,344,567,311 cycles # 2.171 GHz + 2,376,857,450 instructions # 1.77 insn per cycle + 0.619905839 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 584) (512y: 62) (512z: 953) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341946 Relative difference = 2.034932117056294e-07 diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt index 6dd9e0ac51..6add239f16 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-05-15_09:36:15 +DATE: 2024-05-16_15:18:33 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.940650e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.318045e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.789707e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.948407e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.328423e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.761410e+08 ) sec^-1 MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.518906 sec +TOTAL : 0.519601 sec INFO: No Floating Point Exceptions have been reported - 2,145,710,295 cycles # 2.853 GHz - 3,048,382,925 instructions # 1.42 insn per cycle - 0.810167134 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/gcheck.exe -p 2048 256 1 + 2,123,926,879 cycles # 2.815 GHz + 2,991,717,095 instructions # 1.41 insn per cycle + 0.811782941 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 4.232897e-01 Avg ME (F77/GPU) = 0.42328961386341946 Relative difference = 2.034932117056294e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.840959e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.138397e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.138397e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.686449e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.122021e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.122021e+06 ) sec^-1 MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 1.178485 sec +TOTAL : 1.196252 sec INFO: No Floating Point Exceptions have been reported - 3,433,874,218 cycles # 2.901 GHz - 8,629,115,026 instructions # 2.51 insn per cycle - 1.184073779 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 403) (avx2: 0) (512y: 0) (512z: 0) + 3,435,810,217 cycles # 2.862 GHz + 8,629,255,980 instructions # 2.51 insn per cycle + 1.201785163 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 403) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341951 Relative difference = 2.0349321157448718e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.605196e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.108330e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.108330e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.590372e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.090308e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.090308e+06 ) sec^-1 MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.766522 sec +TOTAL : 0.773787 sec INFO: No Floating Point Exceptions have been reported - 2,179,218,947 cycles # 2.825 GHz - 5,398,904,070 instructions # 2.48 insn per cycle - 0.772033522 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1258) (avx2: 0) (512y: 0) (512z: 0) + 2,172,281,754 cycles # 2.790 GHz + 5,399,686,889 instructions # 2.49 insn per cycle + 0.779398624 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1258) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341951 Relative difference = 2.0349321157448718e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.311152e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.464438e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.464438e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.283822e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.420214e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.420214e+06 ) sec^-1 MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.567182 sec +TOTAL : 0.573489 sec INFO: No Floating Point Exceptions have been reported - 1,592,119,572 cycles # 2.783 GHz - 3,148,952,578 instructions # 1.98 insn per cycle - 0.572671512 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1386) (512y: 0) (512z: 0) + 1,585,769,603 cycles # 2.741 GHz + 3,149,146,191 instructions # 1.99 insn per cycle + 0.579182812 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1386) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341946 Relative difference = 2.034932117056294e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.409665e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.678412e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.678412e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.354137e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.604902e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.604902e+06 ) sec^-1 MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.548491 sec +TOTAL : 0.559958 sec INFO: No Floating Point Exceptions have been reported - 1,543,347,926 cycles # 2.789 GHz - 3,062,213,060 instructions # 1.98 insn per cycle - 0.553926856 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1220) (512y: 95) (512z: 0) + 1,547,131,577 cycles # 2.739 GHz + 3,062,437,995 instructions # 1.98 insn per cycle + 0.565482274 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1220) (512y: 95) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341946 Relative difference = 2.034932117056294e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.150474e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.084771e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.084771e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.108481e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.023241e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.023241e+06 ) sec^-1 MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.600980 sec +TOTAL : 0.612188 sec INFO: No Floating Point Exceptions have been reported - 1,356,107,276 cycles # 2.238 GHz - 2,361,951,036 instructions # 1.74 insn per cycle - 0.606658090 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 557) (512y: 62) (512z: 944) + 1,354,565,413 cycles # 2.195 GHz + 2,362,076,089 instructions # 1.74 insn per cycle + 0.617754113 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 557) (512y: 62) (512z: 944) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341946 Relative difference = 2.034932117056294e-07 diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt index 5f4022aca0..35b822f8f6 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-05-15_09:36:27 +DATE: 2024-05-16_15:18:45 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.370405e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.208467e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.261700e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.370205e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.202282e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.219119e+09 ) sec^-1 MeanMatrixElemValue = ( 4.240325e-01 +- 1.231174e-04 ) GeV^0 -TOTAL : 0.479192 sec +TOTAL : 0.481970 sec INFO: No Floating Point Exceptions have been reported - 2,027,387,341 cycles # 2.850 GHz - 2,908,651,127 instructions # 1.43 insn per cycle - 0.768339555 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 + 1,992,725,828 cycles # 2.818 GHz + 2,868,294,521 instructions # 1.44 insn per cycle + 0.764321619 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 72 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 4.232893e-01 Avg ME (F77/GPU) = 0.42328959883889183 Relative difference = 7.059920764700599e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.802984e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.139381e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.139381e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.685625e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.126627e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.126627e+06 ) sec^-1 MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 1.158709 sec +TOTAL : 1.173857 sec INFO: No Floating Point Exceptions have been reported - 3,373,049,721 cycles # 2.900 GHz - 8,663,491,398 instructions # 2.57 insn per cycle - 1.163930937 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 464) (avx2: 0) (512y: 0) (512z: 0) + 3,371,653,633 cycles # 2.862 GHz + 8,663,374,999 instructions # 2.57 insn per cycle + 1.179087797 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 464) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 Avg ME (F77/C++) = 0.42328961598104797 Relative difference = 3.775440734888737e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.387191e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.803778e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.803778e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.242831e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.476100e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.476100e+06 ) sec^-1 MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.531005 sec +TOTAL : 0.559869 sec INFO: No Floating Point Exceptions have been reported - 1,545,318,515 cycles # 2.885 GHz - 3,686,876,111 instructions # 2.39 insn per cycle - 0.536250836 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1472) (avx2: 0) (512y: 0) (512z: 0) + 1,544,628,517 cycles # 2.742 GHz + 3,687,558,281 instructions # 2.39 insn per cycle + 0.565253973 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1472) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 Avg ME (F77/C++) = 0.42328960439772345 Relative difference = 1.0389396439618597e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.111672e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.628157e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.628157e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.072720e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.536969e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.536969e+06 ) sec^-1 MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.425576 sec +TOTAL : 0.431765 sec INFO: No Floating Point Exceptions have been reported - 1,209,256,462 cycles # 2.808 GHz - 2,425,469,042 instructions # 2.01 insn per cycle - 0.431202452 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1835) (512y: 0) (512z: 0) + 1,203,780,059 cycles # 2.758 GHz + 2,425,738,448 instructions # 2.02 insn per cycle + 0.436956710 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1835) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 Avg ME (F77/C++) = 0.42328956670826301 Relative difference = 7.865002347873079e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.178015e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.875142e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.875142e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.171115e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.846212e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.846212e+06 ) sec^-1 MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.420058 sec +TOTAL : 0.420853 sec INFO: No Floating Point Exceptions have been reported - 1,188,994,374 cycles # 2.799 GHz - 2,372,253,902 instructions # 2.00 insn per cycle - 0.425516144 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1716) (512y: 2) (512z: 0) + 1,176,016,394 cycles # 2.764 GHz + 2,371,904,468 instructions # 2.02 insn per cycle + 0.426173333 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1716) (512y: 2) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 Avg ME (F77/C++) = 0.42328956670826301 Relative difference = 7.865002347873079e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.937549e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.030922e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.030922e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.877260e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.908000e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.908000e+06 ) sec^-1 MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.449108 sec +TOTAL : 0.456855 sec INFO: No Floating Point Exceptions have been reported - 1,057,896,924 cycles # 2.331 GHz - 2,045,513,672 instructions # 1.93 insn per cycle - 0.454389124 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1125) (512y: 5) (512z: 1216) + 1,057,659,631 cycles # 2.291 GHz + 2,045,594,279 instructions # 1.93 insn per cycle + 0.462305299 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1125) (512y: 5) (512z: 1216) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 Avg ME (F77/C++) = 0.42328957567224279 Relative difference = 5.7473080363015266e-08 diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt index 3d35345a2a..7aff49b16c 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-05-15_09:36:38 +DATE: 2024-05-16_15:18:56 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.381885e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.216842e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.268913e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.371360e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.210950e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.256375e+09 ) sec^-1 MeanMatrixElemValue = ( 4.240325e-01 +- 1.231174e-04 ) GeV^0 -TOTAL : 0.478997 sec +TOTAL : 0.480672 sec INFO: No Floating Point Exceptions have been reported - 2,030,799,401 cycles # 2.843 GHz - 2,900,571,681 instructions # 1.43 insn per cycle - 0.770784907 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/gcheck.exe -p 2048 256 1 + 1,992,055,315 cycles # 2.814 GHz + 2,833,598,547 instructions # 1.42 insn per cycle + 0.764848194 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 71 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 4.232893e-01 Avg ME (F77/GPU) = 0.42328960436861962 Relative difference = 7.190557844040413e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.919524e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.156224e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.156224e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.763702e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.137508e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.137508e+06 ) sec^-1 MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 1.146189 sec +TOTAL : 1.163446 sec INFO: No Floating Point Exceptions have been reported - 3,340,681,084 cycles # 2.903 GHz - 8,537,405,679 instructions # 2.56 insn per cycle - 1.151410707 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 372) (avx2: 0) (512y: 0) (512z: 0) + 3,338,476,373 cycles # 2.858 GHz + 8,537,550,948 instructions # 2.56 insn per cycle + 1.168736395 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 372) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 Avg ME (F77/C++) = 0.42328961598104797 Relative difference = 3.775440734888737e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.328612e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.639360e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.639360e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.260122e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.497908e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.497908e+06 ) sec^-1 MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.540599 sec +TOTAL : 0.555036 sec INFO: No Floating Point Exceptions have been reported - 1,541,725,161 cycles # 2.828 GHz - 3,654,878,901 instructions # 2.37 insn per cycle - 0.545806836 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1417) (avx2: 0) (512y: 0) (512z: 0) + 1,536,047,057 cycles # 2.745 GHz + 3,655,155,421 instructions # 2.38 insn per cycle + 0.560267212 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1417) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 Avg ME (F77/C++) = 0.42328960439772345 Relative difference = 1.0389396439618597e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.095733e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.576377e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.576377e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.063874e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.501699e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.501699e+06 ) sec^-1 MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.428129 sec +TOTAL : 0.432903 sec INFO: No Floating Point Exceptions have been reported - 1,212,577,396 cycles # 2.801 GHz - 2,409,931,564 instructions # 1.99 insn per cycle - 0.433541587 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1739) (512y: 0) (512z: 0) + 1,210,141,290 cycles # 2.765 GHz + 2,409,755,736 instructions # 1.99 insn per cycle + 0.438252635 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1739) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 Avg ME (F77/C++) = 0.42328956670826301 Relative difference = 7.865002347873079e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.197829e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.948312e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.948312e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.166764e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.861571e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.861571e+06 ) sec^-1 MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.416935 sec +TOTAL : 0.420903 sec INFO: No Floating Point Exceptions have been reported - 1,184,755,346 cycles # 2.810 GHz - 2,360,000,457 instructions # 1.99 insn per cycle - 0.422233667 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1639) (512y: 2) (512z: 0) + 1,178,969,939 cycles # 2.770 GHz + 2,360,225,770 instructions # 2.00 insn per cycle + 0.426183474 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1639) (512y: 2) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 Avg ME (F77/C++) = 0.42328956670826301 Relative difference = 7.865002347873079e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.948970e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.098496e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.098496e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.911284e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.009343e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.009343e+06 ) sec^-1 MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.445077 sec +TOTAL : 0.450059 sec INFO: No Floating Point Exceptions have been reported - 1,053,296,798 cycles # 2.342 GHz - 2,030,151,201 instructions # 1.93 insn per cycle - 0.450399180 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1038) (512y: 5) (512z: 1206) + 1,050,992,336 cycles # 2.312 GHz + 2,030,439,704 instructions # 1.93 insn per cycle + 0.455402836 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1038) (512y: 5) (512z: 1206) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 Avg ME (F77/C++) = 0.42328957567224279 Relative difference = 5.7473080363015266e-08 diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt index df4c59c2d7..abe970d6c3 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-05-15_09:36:49 +DATE: 2024-05-16_15:19:08 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.836360e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.773080e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.372620e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.820532e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.774843e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.362520e+08 ) sec^-1 MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.521460 sec +TOTAL : 0.522135 sec INFO: No Floating Point Exceptions have been reported - 2,141,147,741 cycles # 2.839 GHz - 3,055,126,214 instructions # 1.43 insn per cycle - 0.813180869 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/gcheck.exe -p 2048 256 1 + 2,125,526,304 cycles # 2.816 GHz + 3,031,609,259 instructions # 1.43 insn per cycle + 0.813775431 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 132 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 4.232897e-01 Avg ME (F77/GPU) = 0.42328961420809225 Relative difference = 2.02678940084305e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.535385e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.099052e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.099052e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.477506e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.093135e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.093135e+06 ) sec^-1 MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 1.214378 sec +TOTAL : 1.221347 sec INFO: No Floating Point Exceptions have been reported - 3,512,565,133 cycles # 2.883 GHz - 8,782,234,865 instructions # 2.50 insn per cycle - 1.219810796 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 466) (avx2: 0) (512y: 0) (512z: 0) + 3,505,104,547 cycles # 2.859 GHz + 8,781,502,817 instructions # 2.51 insn per cycle + 1.226777715 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 466) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962565639783 Relative difference = 1.7563291089600324e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.634092e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.156666e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.156666e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.650256e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.201424e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.201424e+06 ) sec^-1 MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.755579 sec +TOTAL : 0.750812 sec INFO: No Floating Point Exceptions have been reported - 2,158,444,599 cycles # 2.839 GHz - 5,462,042,613 instructions # 2.53 insn per cycle - 0.761050701 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1315) (avx2: 0) (512y: 0) (512z: 0) + 2,158,593,065 cycles # 2.858 GHz + 5,461,970,761 instructions # 2.53 insn per cycle + 0.756427517 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1315) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962565639783 Relative difference = 1.7563291089600324e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.351677e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.533393e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.533393e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.173052e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.222124e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.222124e+06 ) sec^-1 MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.560475 sec +TOTAL : 0.600946 sec INFO: No Floating Point Exceptions have been reported - 1,574,362,055 cycles # 2.784 GHz - 3,128,678,680 instructions # 1.99 insn per cycle - 0.566031332 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1508) (512y: 0) (512z: 0) + 1,584,857,703 cycles # 2.630 GHz + 3,130,453,718 instructions # 1.98 insn per cycle + 0.606559761 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1508) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962604218012 Relative difference = 1.747215201983364e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.484188e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.839921e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.839921e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.444228e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.788523e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.788523e+06 ) sec^-1 MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.536080 sec +TOTAL : 0.544040 sec INFO: No Floating Point Exceptions have been reported - 1,508,778,976 cycles # 2.790 GHz - 2,980,181,539 instructions # 1.98 insn per cycle - 0.541495684 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1266) (512y: 104) (512z: 0) + 1,507,653,377 cycles # 2.746 GHz + 2,979,978,086 instructions # 1.98 insn per cycle + 0.549733637 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1266) (512y: 104) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962604218012 Relative difference = 1.747215201983364e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.192468e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.184479e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.184479e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.159766e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.131056e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.131056e+06 ) sec^-1 MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.593157 sec +TOTAL : 0.601738 sec INFO: No Floating Point Exceptions have been reported - 1,325,099,887 cycles # 2.215 GHz - 2,317,476,168 instructions # 1.75 insn per cycle - 0.598857501 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 708) (512y: 64) (512z: 1000) + 1,324,343,740 cycles # 2.183 GHz + 2,317,585,809 instructions # 1.75 insn per cycle + 0.607328338 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 708) (512y: 64) (512z: 1000) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962604218012 Relative difference = 1.747215201983364e-07 diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt index f455f2bbec..91c7a883f0 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-05-15_09:37:01 +DATE: 2024-05-16_15:19:20 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.920881e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.269245e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.735869e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.922874e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.310136e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.745093e+08 ) sec^-1 MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.518131 sec +TOTAL : 0.518290 sec INFO: No Floating Point Exceptions have been reported - 2,139,648,032 cycles # 2.844 GHz - 3,068,084,951 instructions # 1.43 insn per cycle - 0.808990315 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/gcheck.exe -p 2048 256 1 + 2,124,893,311 cycles # 2.820 GHz + 3,045,592,907 instructions # 1.43 insn per cycle + 0.810370808 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 4.232897e-01 Avg ME (F77/GPU) = 0.42328961420809225 Relative difference = 2.02678940084305e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.651516e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.113057e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.113057e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.542081e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.100861e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.100861e+06 ) sec^-1 MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 1.199334 sec +TOTAL : 1.212162 sec INFO: No Floating Point Exceptions have been reported - 3,485,486,663 cycles # 2.895 GHz - 8,692,192,022 instructions # 2.49 insn per cycle - 1.204801600 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 408) (avx2: 0) (512y: 0) (512z: 0) + 3,479,876,909 cycles # 2.860 GHz + 8,693,142,752 instructions # 2.50 insn per cycle + 1.217788949 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 408) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962565639783 Relative difference = 1.7563291089600324e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.605320e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.116395e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.116395e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.583309e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.076893e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.076893e+06 ) sec^-1 MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.766826 sec +TOTAL : 0.776846 sec INFO: No Floating Point Exceptions have been reported - 2,175,433,735 cycles # 2.822 GHz - 5,397,175,628 instructions # 2.48 insn per cycle - 0.772462095 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1286) (avx2: 0) (512y: 0) (512z: 0) + 2,167,338,088 cycles # 2.773 GHz + 5,396,551,029 instructions # 2.49 insn per cycle + 0.782321373 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1286) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962565639783 Relative difference = 1.7563291089600324e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.361713e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.582957e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.582957e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.326845e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.550286e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.550286e+06 ) sec^-1 MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.558306 sec +TOTAL : 0.565802 sec INFO: No Floating Point Exceptions have been reported - 1,566,391,037 cycles # 2.781 GHz - 3,096,526,527 instructions # 1.98 insn per cycle - 0.564013625 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1403) (512y: 0) (512z: 0) + 1,565,712,129 cycles # 2.743 GHz + 3,096,211,416 instructions # 1.98 insn per cycle + 0.571442008 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1403) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962604218012 Relative difference = 1.747215201983364e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.482537e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.845149e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.845149e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.453432e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.812851e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.812851e+06 ) sec^-1 MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.536318 sec +TOTAL : 0.541762 sec INFO: No Floating Point Exceptions have been reported - 1,509,105,702 cycles # 2.789 GHz - 2,962,529,864 instructions # 1.96 insn per cycle - 0.541724501 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1207) (512y: 104) (512z: 0) + 1,501,240,710 cycles # 2.746 GHz + 2,962,583,104 instructions # 1.97 insn per cycle + 0.547343450 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1207) (512y: 104) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962604218012 Relative difference = 1.747215201983364e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.120551e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.050745e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.050745e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.179755e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.168512e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.168512e+06 ) sec^-1 MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.608424 sec +TOTAL : 0.595795 sec INFO: No Floating Point Exceptions have been reported - 1,350,605,417 cycles # 2.202 GHz - 2,301,674,581 instructions # 1.70 insn per cycle - 0.613832458 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 669) (512y: 64) (512z: 987) + 1,328,066,698 cycles # 2.210 GHz + 2,301,968,914 instructions # 1.73 insn per cycle + 0.601517736 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 669) (512y: 64) (512z: 987) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962604218012 Relative difference = 1.747215201983364e-07 diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt index 5113c2293b..685cbca5b9 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_09:33:48 +DATE: 2024-05-16_15:16:05 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.741611e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.169597e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.279193e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.742150e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.168430e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.277843e+08 ) sec^-1 MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 0.530929 sec +TOTAL : 0.532609 sec INFO: No Floating Point Exceptions have been reported - 2,195,068,626 cycles # 2.840 GHz - 3,156,047,151 instructions # 1.44 insn per cycle - 0.830513630 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe -p 2048 256 1 + 2,187,320,510 cycles # 2.847 GHz + 3,138,661,758 instructions # 1.43 insn per cycle + 0.825533767 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 3.234080e+00 Avg ME (F77/GPU) = 3.2340795799595186 Relative difference = 1.2987943449389332e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.039441e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.099419e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.099419e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.052254e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.112326e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.112326e+05 ) sec^-1 MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 5.250958 sec +TOTAL : 5.217611 sec INFO: No Floating Point Exceptions have been reported - 15,179,924,789 cycles # 2.888 GHz - 38,380,026,423 instructions # 2.53 insn per cycle - 5.256350609 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 673) (avx2: 0) (512y: 0) (512z: 0) + 15,171,088,318 cycles # 2.905 GHz + 38,379,828,637 instructions # 2.53 insn per cycle + 5.223033411 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 673) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340795799593964 Relative difference = 1.2987947225564713e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.475229e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.666711e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.666711e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.483453e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.675957e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.675957e+05 ) sec^-1 MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 3.127283 sec +TOTAL : 3.119586 sec INFO: No Floating Point Exceptions have been reported - 9,015,042,077 cycles # 2.879 GHz - 24,584,647,400 instructions # 2.73 insn per cycle - 3.132785227 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) + 9,050,575,942 cycles # 2.897 GHz + 24,585,418,505 instructions # 2.72 insn per cycle + 3.125051862 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340795799593955 Relative difference = 1.2987947253027805e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.420774e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.902872e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.902872e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.531605e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.007383e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.007383e+05 ) sec^-1 MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 2.045849 sec +TOTAL : 2.004395 sec INFO: No Floating Point Exceptions have been reported - 5,486,128,241 cycles # 2.677 GHz - 11,257,791,321 instructions # 2.05 insn per cycle - 2.051495208 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2379) (512y: 0) (512z: 0) + 5,470,487,475 cycles # 2.723 GHz + 11,258,117,341 instructions # 2.06 insn per cycle + 2.009874159 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2379) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340795799594546 Relative difference = 1.2987945426732077e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.115637e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.706987e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.706987e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.034312e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.611178e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.611178e+05 ) sec^-1 MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 1.823473 sec +TOTAL : 1.846817 sec INFO: No Floating Point Exceptions have been reported - 4,932,683,503 cycles # 2.698 GHz - 10,564,312,534 instructions # 2.14 insn per cycle - 1.829070422 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) + 4,937,000,755 cycles # 2.666 GHz + 10,562,656,233 instructions # 2.14 insn per cycle + 1.852346867 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340795799594546 Relative difference = 1.2987945426732077e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.642478e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.841447e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.841447e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.686069e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.892849e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.892849e+05 ) sec^-1 MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 2.987876 sec +TOTAL : 2.955560 sec INFO: No Floating Point Exceptions have been reported - 5,393,291,420 cycles # 1.802 GHz - 7,798,978,125 instructions # 1.45 insn per cycle - 2.993405603 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1545) + 5,363,967,162 cycles # 1.812 GHz + 7,798,816,647 instructions # 1.45 insn per cycle + 2.961128813 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1545) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340795799594546 Relative difference = 1.2987945426732077e-07 diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt index 2da52e42cf..e33bd01ef0 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_09:34:12 +DATE: 2024-05-16_15:16:29 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.746278e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.169345e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.279692e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.734270e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.167895e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.277771e+08 ) sec^-1 MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 0.526969 sec +TOTAL : 0.531030 sec INFO: No Floating Point Exceptions have been reported - 2,175,481,601 cycles # 2.841 GHz - 3,133,236,154 instructions # 1.44 insn per cycle - 0.822493478 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/gcheck.exe -p 2048 256 1 + 2,147,766,041 cycles # 2.808 GHz + 3,081,960,346 instructions # 1.43 insn per cycle + 0.823573588 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 3.234080e+00 Avg ME (F77/GPU) = 3.2340795799595186 Relative difference = 1.2987943449389332e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.073853e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.135304e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.135304e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.072347e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.133952e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.133952e+05 ) sec^-1 MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 5.163747 sec +TOTAL : 5.167480 sec INFO: No Floating Point Exceptions have been reported - 15,015,477,641 cycles # 2.905 GHz - 40,101,004,073 instructions # 2.67 insn per cycle - 5.169229329 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) + 15,011,121,904 cycles # 2.902 GHz + 40,101,107,795 instructions # 2.67 insn per cycle + 5.172969591 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340795799593964 Relative difference = 1.2987947225564713e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.645087e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.854605e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.854605e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.643871e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.853935e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.853935e+05 ) sec^-1 MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 2.984803 sec +TOTAL : 2.986462 sec INFO: No Floating Point Exceptions have been reported - 8,681,133,196 cycles # 2.904 GHz - 23,670,240,335 instructions # 2.73 insn per cycle - 2.990217540 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2071) (avx2: 0) (512y: 0) (512z: 0) + 8,687,902,361 cycles # 2.905 GHz + 23,671,582,038 instructions # 2.72 insn per cycle + 2.991891761 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2071) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340795799593955 Relative difference = 1.2987947253027805e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.949358e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.327629e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.327629e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.688647e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.031946e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.031946e+05 ) sec^-1 MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 2.227194 sec +TOTAL : 2.347118 sec INFO: No Floating Point Exceptions have been reported - 6,083,454,450 cycles # 2.726 GHz - 13,060,354,927 instructions # 2.15 insn per cycle - 2.232659491 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2545) (512y: 0) (512z: 0) + 6,408,205,490 cycles # 2.726 GHz + 13,061,009,362 instructions # 2.04 insn per cycle + 2.352705794 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2545) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340795799594546 Relative difference = 1.2987945426732077e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.235300e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.655532e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.655532e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.217515e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.639971e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.639971e+05 ) sec^-1 MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 2.110790 sec +TOTAL : 2.116902 sec INFO: No Floating Point Exceptions have been reported - 5,768,971,479 cycles # 2.727 GHz - 12,319,476,206 instructions # 2.14 insn per cycle - 2.116330183 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2092) (512y: 294) (512z: 0) + 5,786,103,959 cycles # 2.728 GHz + 12,322,398,791 instructions # 2.13 insn per cycle + 2.122365893 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2092) (512y: 294) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340795799594546 Relative difference = 1.2987945426732077e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.372736e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.543941e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.543941e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.391355e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.565589e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.565589e+05 ) sec^-1 MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 3.218112 sec +TOTAL : 3.201193 sec INFO: No Floating Point Exceptions have been reported - 5,823,139,493 cycles # 1.807 GHz - 9,601,917,078 instructions # 1.65 insn per cycle - 3.223679661 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1509) (512y: 209) (512z: 1970) + 5,819,258,849 cycles # 1.816 GHz + 9,603,315,511 instructions # 1.65 insn per cycle + 3.206783116 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1509) (512y: 209) (512z: 1970) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340795799594546 Relative difference = 1.2987945426732077e-07 diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt index 1af837234a..fa2404eda0 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_09:34:36 +DATE: 2024-05-16_15:16:53 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.812221e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.671322e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.977032e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.806467e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.679043e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.988694e+08 ) sec^-1 MeanMatrixElemValue = ( 3.294909e+00 +- 3.228140e-03 ) GeV^0 -TOTAL : 0.485928 sec +TOTAL : 0.484472 sec INFO: No Floating Point Exceptions have been reported - 2,016,146,435 cycles # 2.835 GHz - 2,919,084,596 instructions # 1.45 insn per cycle - 0.768276318 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe -p 2048 256 1 + 2,024,107,607 cycles # 2.847 GHz + 2,925,717,340 instructions # 1.45 insn per cycle + 0.767822860 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 3.234085e+00 Avg ME (F77/GPU) = 3.2341253389604390 Relative difference = 1.2473067479392238e-05 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.194407e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.265388e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.265388e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.190102e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.263149e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.263149e+05 ) sec^-1 MeanMatrixElemValue = ( 3.294973e+00 +- 3.228584e-03 ) GeV^0 -TOTAL : 4.865848 sec +TOTAL : 4.875075 sec INFO: No Floating Point Exceptions have been reported - 14,143,024,804 cycles # 2.904 GHz - 38,346,436,959 instructions # 2.71 insn per cycle - 4.871040899 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 596) (avx2: 0) (512y: 0) (512z: 0) + 14,157,231,167 cycles # 2.902 GHz + 38,349,372,496 instructions # 2.71 insn per cycle + 4.880360280 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 596) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234094e+00 Avg ME (F77/C++) = 3.2340941932052374 Relative difference = 5.974014286114415e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.902133e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.301943e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.301943e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.893708e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.295163e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.295163e+05 ) sec^-1 MeanMatrixElemValue = ( 3.294972e+00 +- 3.228583e-03 ) GeV^0 -TOTAL : 2.227789 sec +TOTAL : 2.231375 sec INFO: No Floating Point Exceptions have been reported - 6,482,988,779 cycles # 2.904 GHz - 15,822,130,701 instructions # 2.44 insn per cycle - 2.233037883 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2693) (avx2: 0) (512y: 0) (512z: 0) + 6,474,839,888 cycles # 2.896 GHz + 15,821,273,128 instructions # 2.44 insn per cycle + 2.236825857 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2693) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234093e+00 Avg ME (F77/C++) = 3.2340934062376618 Relative difference = 1.2561100182708985e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.940581e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.026038e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.026038e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.952001e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.027533e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.027533e+06 ) sec^-1 MeanMatrixElemValue = ( 3.295004e+00 +- 3.229072e-03 ) GeV^0 -TOTAL : 1.260538 sec +TOTAL : 1.258720 sec INFO: No Floating Point Exceptions have been reported - 3,456,681,256 cycles # 2.732 GHz - 7,598,840,761 instructions # 2.20 insn per cycle - 1.265835692 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3054) (512y: 0) (512z: 0) + 3,454,982,692 cycles # 2.735 GHz + 7,599,041,128 instructions # 2.20 insn per cycle + 1.263980564 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3054) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234092e+00 Avg ME (F77/C++) = 3.2340919882990420 Relative difference = 3.6180040581126224e-09 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.403615e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.088127e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.088127e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.592851e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.112843e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.112843e+06 ) sec^-1 MeanMatrixElemValue = ( 3.295004e+00 +- 3.229072e-03 ) GeV^0 -TOTAL : 1.203220 sec +TOTAL : 1.180051 sec INFO: No Floating Point Exceptions have been reported - 3,237,988,506 cycles # 2.681 GHz - 7,206,923,956 instructions # 2.23 insn per cycle - 1.208534996 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2854) (512y: 23) (512z: 0) + 3,244,154,820 cycles # 2.739 GHz + 7,208,080,032 instructions # 2.22 insn per cycle + 1.185371954 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2854) (512y: 23) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234092e+00 Avg ME (F77/C++) = 3.2340919882990420 Relative difference = 3.6180040581126224e-09 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.777065e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.501185e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.501185e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.861599e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.601056e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.601056e+05 ) sec^-1 MeanMatrixElemValue = ( 3.295004e+00 +- 3.229072e-03 ) GeV^0 -TOTAL : 1.635695 sec +TOTAL : 1.616099 sec INFO: No Floating Point Exceptions have been reported - 3,058,049,214 cycles # 1.865 GHz - 5,840,333,775 instructions # 1.91 insn per cycle - 1.641002915 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2375) (512y: 24) (512z: 1889) + 3,061,871,050 cycles # 1.890 GHz + 5,840,738,200 instructions # 1.91 insn per cycle + 1.621459577 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2375) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234092e+00 Avg ME (F77/C++) = 3.2340921289287508 Relative difference = 3.986551736519174e-08 diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt index 71e6b0ffef..17580b0829 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_09:34:55 +DATE: 2024-05-16_15:17:12 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.858638e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.727382e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.049746e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.907160e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.728602e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.048441e+08 ) sec^-1 MeanMatrixElemValue = ( 3.294909e+00 +- 3.228140e-03 ) GeV^0 -TOTAL : 0.486234 sec +TOTAL : 0.485743 sec INFO: No Floating Point Exceptions have been reported - 2,023,449,070 cycles # 2.846 GHz - 2,873,402,373 instructions # 1.42 insn per cycle - 0.769379888 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/gcheck.exe -p 2048 256 1 + 2,023,423,533 cycles # 2.849 GHz + 2,905,255,031 instructions # 1.44 insn per cycle + 0.768600730 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 3.234085e+00 Avg ME (F77/GPU) = 3.2341253389604390 Relative difference = 1.2473067479392238e-05 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.166228e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.235687e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.235687e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.168782e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.238544e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.238544e+05 ) sec^-1 MeanMatrixElemValue = ( 3.294973e+00 +- 3.228584e-03 ) GeV^0 -TOTAL : 4.927743 sec +TOTAL : 4.921731 sec INFO: No Floating Point Exceptions have been reported - 14,309,020,603 cycles # 2.901 GHz - 39,834,185,016 instructions # 2.78 insn per cycle - 4.932988384 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 567) (avx2: 0) (512y: 0) (512z: 0) + 14,314,886,956 cycles # 2.906 GHz + 39,834,092,366 instructions # 2.78 insn per cycle + 4.927032591 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 567) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234094e+00 Avg ME (F77/C++) = 3.2340941675938666 Relative difference = 5.182096339328524e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.724901e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.279125e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.279125e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.713515e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.269520e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.269520e+05 ) sec^-1 MeanMatrixElemValue = ( 3.294972e+00 +- 3.228583e-03 ) GeV^0 -TOTAL : 1.920068 sec +TOTAL : 1.922771 sec INFO: No Floating Point Exceptions have been reported - 5,587,136,910 cycles # 2.903 GHz - 15,286,018,089 instructions # 2.74 insn per cycle - 1.925405428 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2473) (avx2: 0) (512y: 0) (512z: 0) + 5,581,497,918 cycles # 2.896 GHz + 15,286,085,618 instructions # 2.74 insn per cycle + 1.928038449 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2473) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234093e+00 Avg ME (F77/C++) = 3.2340934062376618 Relative difference = 1.2561100182708985e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.329531e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.968208e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.968208e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.348339e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.987488e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.987488e+05 ) sec^-1 MeanMatrixElemValue = ( 3.295004e+00 +- 3.229072e-03 ) GeV^0 -TOTAL : 1.743816 sec +TOTAL : 1.738529 sec INFO: No Floating Point Exceptions have been reported - 4,752,684,066 cycles # 2.718 GHz - 9,734,599,386 instructions # 2.05 insn per cycle - 1.749311383 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3707) (512y: 0) (512z: 0) + 4,748,584,350 cycles # 2.724 GHz + 9,734,762,909 instructions # 2.05 insn per cycle + 1.743720825 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3707) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234092e+00 Avg ME (F77/C++) = 3.2340919817797840 Relative difference = 5.633796441974414e-09 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.472652e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.141162e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.141162e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.524514e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.201131e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.201131e+05 ) sec^-1 MeanMatrixElemValue = ( 3.295004e+00 +- 3.229072e-03 ) GeV^0 -TOTAL : 1.707763 sec +TOTAL : 1.693263 sec INFO: No Floating Point Exceptions have been reported - 4,621,308,695 cycles # 2.699 GHz - 9,326,787,425 instructions # 2.02 insn per cycle - 1.713040106 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3495) (512y: 0) (512z: 0) + 4,630,030,488 cycles # 2.727 GHz + 9,326,323,775 instructions # 2.01 insn per cycle + 1.698452247 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3495) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234092e+00 Avg ME (F77/C++) = 3.2340919817797840 Relative difference = 5.633796441974414e-09 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.532423e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.010355e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.010355e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.566237e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.043529e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.043529e+05 ) sec^-1 MeanMatrixElemValue = ( 3.295004e+00 +- 3.229072e-03 ) GeV^0 -TOTAL : 1.982779 sec +TOTAL : 1.970968 sec INFO: No Floating Point Exceptions have been reported - 3,653,134,219 cycles # 1.838 GHz - 7,034,588,180 instructions # 1.93 insn per cycle - 1.987995749 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2608) (512y: 12) (512z: 2220) + 3,659,262,236 cycles # 1.853 GHz + 7,035,706,161 instructions # 1.92 insn per cycle + 1.976219857 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2608) (512y: 12) (512z: 2220) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234092e+00 Avg ME (F77/C++) = 3.2340921270661056 Relative difference = 3.928957668408837e-08 diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt index 32f5af3903..b504154b8b 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_09:35:16 +DATE: 2024-05-16_15:17:33 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.732363e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.167582e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.277301e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.734753e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.166290e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.275672e+08 ) sec^-1 MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 0.528085 sec +TOTAL : 0.527580 sec INFO: No Floating Point Exceptions have been reported - 2,179,676,411 cycles # 2.847 GHz - 3,146,231,581 instructions # 1.44 insn per cycle - 0.822131932 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/gcheck.exe -p 2048 256 1 + 2,184,025,819 cycles # 2.852 GHz + 3,120,664,968 instructions # 1.43 insn per cycle + 0.822365132 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 3.234080e+00 Avg ME (F77/GPU) = 3.2340795839181666 Relative difference = 1.2865539301192385e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.042066e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.101336e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.101336e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.032702e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.091464e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.091464e+05 ) sec^-1 MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 5.243500 sec +TOTAL : 5.267767 sec INFO: No Floating Point Exceptions have been reported - 15,259,290,721 cycles # 2.908 GHz - 38,581,382,277 instructions # 2.53 insn per cycle - 5.248883068 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 677) (avx2: 0) (512y: 0) (512z: 0) + 15,275,610,730 cycles # 2.898 GHz + 38,585,204,587 instructions # 2.53 insn per cycle + 5.273127531 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 677) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796721168488 Relative difference = 1.0138374786539113e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.483834e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.674993e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.674993e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.478780e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.672331e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.672331e+05 ) sec^-1 MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 3.119385 sec +TOTAL : 3.124457 sec INFO: No Floating Point Exceptions have been reported - 8,939,775,457 cycles # 2.862 GHz - 24,230,284,808 instructions # 2.71 insn per cycle - 3.124806580 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2188) (avx2: 0) (512y: 0) (512z: 0) + 8,951,368,692 cycles # 2.862 GHz + 24,230,346,765 instructions # 2.71 insn per cycle + 3.129932357 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2188) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796721168488 Relative difference = 1.0138374786539113e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.594731e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.084904e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.084904e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.646169e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.144963e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.144963e+05 ) sec^-1 MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 1.983472 sec +TOTAL : 1.966588 sec INFO: No Floating Point Exceptions have been reported - 5,389,142,139 cycles # 2.710 GHz - 11,283,335,312 instructions # 2.09 insn per cycle - 1.988987810 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2483) (512y: 0) (512z: 0) + 5,394,193,630 cycles # 2.737 GHz + 11,282,079,100 instructions # 2.09 insn per cycle + 1.972075346 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2483) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796772295590 Relative difference = 9.980286234148268e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.299283e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.923370e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.923370e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.312770e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.933844e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.933844e+05 ) sec^-1 MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 1.772999 sec +TOTAL : 1.769300 sec INFO: No Floating Point Exceptions have been reported - 4,852,815,875 cycles # 2.731 GHz - 10,532,942,750 instructions # 2.17 insn per cycle - 1.778490907 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2170) (512y: 148) (512z: 0) + 4,855,634,573 cycles # 2.737 GHz + 10,529,908,188 instructions # 2.17 insn per cycle + 1.774939787 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2170) (512y: 148) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796772295590 Relative difference = 9.980286234148268e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.819775e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.040098e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.040098e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.779051e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.993953e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.993953e+05 ) sec^-1 MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 2.854977 sec +TOTAL : 2.883837 sec INFO: No Floating Point Exceptions have been reported - 5,210,701,654 cycles # 1.823 GHz - 7,608,646,328 instructions # 1.46 insn per cycle - 2.860486302 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1633) (512y: 126) (512z: 1611) + 5,232,692,174 cycles # 1.812 GHz + 7,609,089,901 instructions # 1.45 insn per cycle + 2.889504238 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1633) (512y: 126) (512z: 1611) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796772295590 Relative difference = 9.980286234148268e-08 diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt index 0fb04f2139..62b069d661 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt @@ -40,175 +40,175 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-15_09:35:39 +DATE: 2024-05-16_15:17:56 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.742163e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.168613e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.278429e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.743856e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.168884e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.279553e+08 ) sec^-1 MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 0.529609 sec +TOTAL : 0.531580 sec INFO: No Floating Point Exceptions have been reported - 2,201,621,718 cycles # 2.841 GHz - 3,141,769,862 instructions # 1.43 insn per cycle - 0.833943270 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/gcheck.exe -p 2048 256 1 + 2,155,818,187 cycles # 2.818 GHz + 3,085,690,683 instructions # 1.43 insn per cycle + 0.823819066 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fgcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 3.234080e+00 Avg ME (F77/GPU) = 3.2340795839181666 Relative difference = 1.2865539301192385e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/gcheck.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.029495e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.088187e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.088187e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.002464e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.060011e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.060011e+05 ) sec^-1 MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 5.273888 sec +TOTAL : 5.344548 sec INFO: No Floating Point Exceptions have been reported - 15,342,852,271 cycles # 2.907 GHz - 40,369,748,050 instructions # 2.63 insn per cycle - 5.279165532 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) + 15,331,700,326 cycles # 2.866 GHz + 40,369,778,421 instructions # 2.63 insn per cycle + 5.350011304 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796721168488 Relative difference = 1.0138374786539113e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.708852e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.925232e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.925232e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.555017e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.755921e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.755921e+05 ) sec^-1 MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 2.936198 sec +TOTAL : 3.059082 sec INFO: No Floating Point Exceptions have been reported - 8,518,728,881 cycles # 2.897 GHz - 23,252,737,328 instructions # 2.73 insn per cycle - 2.941680463 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2090) (avx2: 0) (512y: 0) (512z: 0) + 8,522,277,742 cycles # 2.782 GHz + 23,253,428,254 instructions # 2.73 insn per cycle + 3.064709896 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2090) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796721168488 Relative difference = 1.0138374786539113e-07 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.799915e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.157429e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.157429e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.699594e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.044812e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.044812e+05 ) sec^-1 MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 2.293101 sec +TOTAL : 2.340476 sec INFO: No Floating Point Exceptions have been reported - 6,243,757,154 cycles # 2.717 GHz - 12,962,082,336 instructions # 2.08 insn per cycle - 2.298726731 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2668) (512y: 0) (512z: 0) + 6,239,696,903 cycles # 2.661 GHz + 12,963,096,678 instructions # 2.08 insn per cycle + 2.346005075 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2668) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796772295590 Relative difference = 9.980286234148268e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.081644e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.484011e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.484011e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.032659e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.430530e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.430530e+05 ) sec^-1 MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 2.171117 sec +TOTAL : 2.191599 sec INFO: No Floating Point Exceptions have been reported - 5,931,060,022 cycles # 2.726 GHz - 12,239,685,205 instructions # 2.06 insn per cycle - 2.176662808 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2208) (512y: 296) (512z: 0) + 5,901,015,524 cycles # 2.687 GHz + 12,238,387,260 instructions # 2.07 insn per cycle + 2.197121947 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2208) (512y: 296) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796772295590 Relative difference = 9.980286234148268e-08 OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.503773e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.687107e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.687107e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.554826e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.745267e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.745267e+05 ) sec^-1 MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 3.101704 sec +TOTAL : 3.058656 sec INFO: No Floating Point Exceptions have been reported - 5,594,172,202 cycles # 1.801 GHz - 8,744,859,098 instructions # 1.56 insn per cycle - 3.107224592 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1490) (512y: 183) (512z: 1908) + 5,596,491,041 cycles # 1.827 GHz + 8,743,545,379 instructions # 1.56 insn per cycle + 3.064278596 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1490) (512y: 183) (512z: 1908) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796772295590 Relative difference = 9.980286234148268e-08 From 7112ba6caa755c64c323f321f899b0d8c2b47c21 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 16 May 2024 17:12:38 +0300 Subject: [PATCH 40/48] [jtmk2] rerun 102 tput tests on itgold91, all ok STARTED AT Thu May 16 02:40:43 PM CEST 2024 ./tput/teeThroughputX.sh -mix -hrd -makej -eemumu -ggtt -ggttg -ggttgg -gqttq -ggttggg -makeclean ENDED(1) AT Thu May 16 03:01:59 PM CEST 2024 [Status=0] ./tput/teeThroughputX.sh -flt -hrd -makej -eemumu -ggtt -ggttgg -inlonly -makeclean ENDED(2) AT Thu May 16 03:10:45 PM CEST 2024 [Status=0] ./tput/teeThroughputX.sh -makej -eemumu -ggtt -ggttg -gqttq -ggttgg -ggttggg -flt -bridge -makeclean ENDED(3) AT Thu May 16 03:14:38 PM CEST 2024 [Status=0] ./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -rmbhst ENDED(4) AT Thu May 16 03:16:01 PM CEST 2024 [Status=0] ./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -curhst ENDED(5) AT Thu May 16 03:16:14 PM CEST 2024 [Status=0] ./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -common ENDED(6) AT Thu May 16 03:17:37 PM CEST 2024 [Status=0] ./tput/teeThroughputX.sh -mix -hrd -makej -susyggtt -susyggt1t1 -smeftggtttt -heftggbb -makeclean ENDED(7) AT Thu May 16 03:28:16 PM CEST 2024 [Status=0] --- .../log_eemumu_mad_d_inl0_hrd0.txt | 217 +++++++-------- .../log_eemumu_mad_d_inl0_hrd0_bridge.txt | 225 +++++++--------- .../log_eemumu_mad_d_inl0_hrd0_common.txt | 197 ++++++-------- .../log_eemumu_mad_d_inl0_hrd0_curhst.txt | 202 ++++---------- .../log_eemumu_mad_d_inl0_hrd0_rmbhst.txt | 219 +++++++-------- .../log_eemumu_mad_d_inl0_hrd1.txt | 217 +++++++-------- .../log_eemumu_mad_d_inl1_hrd0.txt | 217 +++++++-------- .../log_eemumu_mad_d_inl1_hrd1.txt | 215 +++++++-------- .../log_eemumu_mad_f_inl0_hrd0.txt | 221 +++++++-------- .../log_eemumu_mad_f_inl0_hrd0_bridge.txt | 229 +++++++--------- .../log_eemumu_mad_f_inl0_hrd0_common.txt | 201 ++++++-------- .../log_eemumu_mad_f_inl0_hrd0_curhst.txt | 206 +++++--------- .../log_eemumu_mad_f_inl0_hrd0_rmbhst.txt | 223 +++++++--------- .../log_eemumu_mad_f_inl0_hrd1.txt | 221 +++++++-------- .../log_eemumu_mad_f_inl1_hrd0.txt | 215 +++++++-------- .../log_eemumu_mad_f_inl1_hrd1.txt | 215 +++++++-------- .../log_eemumu_mad_m_inl0_hrd0.txt | 221 +++++++-------- .../log_eemumu_mad_m_inl0_hrd1.txt | 223 +++++++--------- .../log_ggtt_mad_d_inl0_hrd0.txt | 221 +++++++-------- .../log_ggtt_mad_d_inl0_hrd0_bridge.txt | 229 +++++++--------- .../log_ggtt_mad_d_inl0_hrd0_common.txt | 201 ++++++-------- .../log_ggtt_mad_d_inl0_hrd0_curhst.txt | 206 +++++--------- .../log_ggtt_mad_d_inl0_hrd0_rmbhst.txt | 223 +++++++--------- .../log_ggtt_mad_d_inl0_hrd1.txt | 219 +++++++-------- .../log_ggtt_mad_d_inl1_hrd0.txt | 215 +++++++-------- .../log_ggtt_mad_d_inl1_hrd1.txt | 215 +++++++-------- .../log_ggtt_mad_f_inl0_hrd0.txt | 221 +++++++-------- .../log_ggtt_mad_f_inl0_hrd0_bridge.txt | 229 +++++++--------- .../log_ggtt_mad_f_inl0_hrd0_common.txt | 201 ++++++-------- .../log_ggtt_mad_f_inl0_hrd0_curhst.txt | 206 +++++--------- .../log_ggtt_mad_f_inl0_hrd0_rmbhst.txt | 223 +++++++--------- .../log_ggtt_mad_f_inl0_hrd1.txt | 219 +++++++-------- .../log_ggtt_mad_f_inl1_hrd0.txt | 215 +++++++-------- .../log_ggtt_mad_f_inl1_hrd1.txt | 215 +++++++-------- .../log_ggtt_mad_m_inl0_hrd0.txt | 221 +++++++-------- .../log_ggtt_mad_m_inl0_hrd1.txt | 221 +++++++-------- .../log_ggttg_mad_d_inl0_hrd0.txt | 236 +++++++--------- .../log_ggttg_mad_d_inl0_hrd0_bridge.txt | 248 +++++++---------- .../log_ggttg_mad_d_inl0_hrd1.txt | 236 +++++++--------- .../log_ggttg_mad_f_inl0_hrd0.txt | 236 +++++++--------- .../log_ggttg_mad_f_inl0_hrd0_bridge.txt | 248 +++++++---------- .../log_ggttg_mad_f_inl0_hrd1.txt | 236 +++++++--------- .../log_ggttg_mad_m_inl0_hrd0.txt | 236 +++++++--------- .../log_ggttg_mad_m_inl0_hrd1.txt | 238 +++++++---------- .../log_ggttgg_mad_d_inl0_hrd0.txt | 236 +++++++--------- .../log_ggttgg_mad_d_inl0_hrd0_bridge.txt | 248 +++++++---------- .../log_ggttgg_mad_d_inl0_hrd0_common.txt | 216 ++++++--------- .../log_ggttgg_mad_d_inl0_hrd0_curhst.txt | 221 +++++---------- .../log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt | 239 +++++++---------- .../log_ggttgg_mad_d_inl0_hrd1.txt | 236 +++++++--------- .../log_ggttgg_mad_d_inl1_hrd0.txt | 232 +++++++--------- .../log_ggttgg_mad_d_inl1_hrd1.txt | 232 +++++++--------- .../log_ggttgg_mad_f_inl0_hrd0.txt | 236 +++++++--------- .../log_ggttgg_mad_f_inl0_hrd0_bridge.txt | 248 +++++++---------- .../log_ggttgg_mad_f_inl0_hrd0_common.txt | 216 ++++++--------- .../log_ggttgg_mad_f_inl0_hrd0_curhst.txt | 221 +++++---------- .../log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt | 239 +++++++---------- .../log_ggttgg_mad_f_inl0_hrd1.txt | 242 +++++++---------- .../log_ggttgg_mad_f_inl1_hrd0.txt | 236 +++++++--------- .../log_ggttgg_mad_f_inl1_hrd1.txt | 236 +++++++--------- .../log_ggttgg_mad_m_inl0_hrd0.txt | 236 +++++++--------- .../log_ggttgg_mad_m_inl0_hrd1.txt | 236 +++++++--------- .../log_ggttggg_mad_d_inl0_hrd0.txt | 236 +++++++--------- .../log_ggttggg_mad_d_inl0_hrd0_bridge.txt | 248 +++++++---------- .../log_ggttggg_mad_d_inl0_hrd1.txt | 236 +++++++--------- .../log_ggttggg_mad_f_inl0_hrd0.txt | 240 +++++++---------- .../log_ggttggg_mad_f_inl0_hrd0_bridge.txt | 252 +++++++----------- .../log_ggttggg_mad_f_inl0_hrd1.txt | 240 +++++++---------- .../log_ggttggg_mad_m_inl0_hrd0.txt | 236 +++++++--------- .../log_ggttggg_mad_m_inl0_hrd1.txt | 236 +++++++--------- .../log_gqttq_mad_d_inl0_hrd0.txt | 236 +++++++--------- .../log_gqttq_mad_d_inl0_hrd0_bridge.txt | 248 +++++++---------- .../log_gqttq_mad_d_inl0_hrd1.txt | 240 +++++++---------- .../log_gqttq_mad_f_inl0_hrd0.txt | 236 +++++++--------- .../log_gqttq_mad_f_inl0_hrd0_bridge.txt | 248 +++++++---------- .../log_gqttq_mad_f_inl0_hrd1.txt | 236 +++++++--------- .../log_gqttq_mad_m_inl0_hrd0.txt | 236 +++++++--------- .../log_gqttq_mad_m_inl0_hrd1.txt | 236 +++++++--------- .../log_heftggbb_mad_d_inl0_hrd0.txt | 221 +++++++-------- .../log_heftggbb_mad_d_inl0_hrd1.txt | 219 +++++++-------- .../log_heftggbb_mad_f_inl0_hrd0.txt | 221 +++++++-------- .../log_heftggbb_mad_f_inl0_hrd1.txt | 221 +++++++-------- .../log_heftggbb_mad_m_inl0_hrd0.txt | 221 +++++++-------- .../log_heftggbb_mad_m_inl0_hrd1.txt | 223 +++++++--------- .../log_smeftggtttt_mad_d_inl0_hrd0.txt | 236 +++++++--------- .../log_smeftggtttt_mad_d_inl0_hrd1.txt | 236 +++++++--------- .../log_smeftggtttt_mad_f_inl0_hrd0.txt | 236 +++++++--------- .../log_smeftggtttt_mad_f_inl0_hrd1.txt | 236 +++++++--------- .../log_smeftggtttt_mad_m_inl0_hrd0.txt | 236 +++++++--------- .../log_smeftggtttt_mad_m_inl0_hrd1.txt | 236 +++++++--------- .../log_susyggt1t1_mad_d_inl0_hrd0.txt | 219 +++++++-------- .../log_susyggt1t1_mad_d_inl0_hrd1.txt | 219 +++++++-------- .../log_susyggt1t1_mad_f_inl0_hrd0.txt | 215 +++++++-------- .../log_susyggt1t1_mad_f_inl0_hrd1.txt | 215 +++++++-------- .../log_susyggt1t1_mad_m_inl0_hrd0.txt | 217 +++++++-------- .../log_susyggt1t1_mad_m_inl0_hrd1.txt | 219 +++++++-------- .../log_susyggtt_mad_d_inl0_hrd0.txt | 221 +++++++-------- .../log_susyggtt_mad_d_inl0_hrd1.txt | 219 +++++++-------- .../log_susyggtt_mad_f_inl0_hrd0.txt | 221 +++++++-------- .../log_susyggtt_mad_f_inl0_hrd1.txt | 219 +++++++-------- .../log_susyggtt_mad_m_inl0_hrd0.txt | 221 +++++++-------- .../log_susyggtt_mad_m_inl0_hrd1.txt | 221 +++++++-------- 102 files changed, 9379 insertions(+), 13733 deletions(-) diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index 8f9275e4c9..d18921ee68 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_14:32:11 +DATE: 2024-05-16_14:54:13 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.832141e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.963737e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.194654e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.663591 sec -INFO: No Floating Point Exceptions have been reported - 2,552,888,166 cycles # 2.852 GHz - 3,988,214,096 instructions # 1.56 insn per cycle - 0.955666761 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.053499e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.235255e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.235255e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.389071 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.427849e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.646988e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.646988e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 5.026503 sec INFO: No Floating Point Exceptions have been reported - 18,286,976,651 cycles # 2.860 GHz - 43,966,894,713 instructions # 2.40 insn per cycle - 6.394304827 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) + 17,588,520,722 cycles # 3.497 GHz + 44,027,100,031 instructions # 2.50 insn per cycle + 5.029781350 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 435) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.575090e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.062276e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.062276e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.417510 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.245017e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.898388e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.898388e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.409137 sec INFO: No Floating Point Exceptions have been reported - 12,744,894,916 cycles # 2.883 GHz - 31,001,019,523 instructions # 2.43 insn per cycle - 4.422588286 seconds time elapsed + 11,925,966,926 cycles # 3.496 GHz + 30,984,049,975 instructions # 2.60 insn per cycle + 3.412125095 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1644) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.946862e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.717949e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.717949e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.650052 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.722325e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.688825e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.688825e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.921646 sec INFO: No Floating Point Exceptions have been reported - 10,049,299,034 cycles # 2.750 GHz - 19,366,983,583 instructions # 1.93 insn per cycle - 3.655131055 seconds time elapsed + 9,887,221,571 cycles # 3.382 GHz + 19,250,563,361 instructions # 1.95 insn per cycle + 2.924502262 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1966) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.022084e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.853820e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.853820e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.526182 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.802769e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.833281e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.833281e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.834217 sec INFO: No Floating Point Exceptions have been reported - 9,727,313,452 cycles # 2.755 GHz - 18,976,774,064 instructions # 1.95 insn per cycle - 3.531366474 seconds time elapsed + 9,591,849,238 cycles # 3.382 GHz + 18,670,168,361 instructions # 1.95 insn per cycle + 2.836767218 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1690) (512y: 181) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.695166e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.233065e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.233065e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.130967 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.517779e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.263169e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.263169e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.379153 sec INFO: No Floating Point Exceptions have been reported - 8,580,931,991 cycles # 2.075 GHz - 15,727,945,386 instructions # 1.83 insn per cycle - 4.136130895 seconds time elapsed + 8,086,400,503 cycles # 3.395 GHz + 15,407,515,769 instructions # 1.91 insn per cycle + 2.381976161 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 901) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt index 556a164c58..02c21aaf1a 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt @@ -1,227 +1,188 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_14:59:51 +DATE: 2024-05-16_15:12:05 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.482485e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.592798e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.592798e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.321030 sec -INFO: No Floating Point Exceptions have been reported - 7,329,722,152 cycles # 2.840 GHz - 13,178,162,400 instructions # 1.80 insn per cycle - 2.637544426 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.015910e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.185158e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.185158e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.819356 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.385573e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.591799e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.591799e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 5.240714 sec INFO: No Floating Point Exceptions have been reported - 19,469,024,508 cycles # 2.853 GHz - 44,194,459,972 instructions # 2.27 insn per cycle - 6.826135735 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) + 18,348,617,166 cycles # 3.499 GHz + 44,271,880,648 instructions # 2.41 insn per cycle + 5.244548893 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 435) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.484528e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.911785e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.911785e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.870031 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.133902e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.717359e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.717359e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.714837 sec INFO: No Floating Point Exceptions have been reported - 13,928,289,994 cycles # 2.857 GHz - 31,840,505,402 instructions # 2.29 insn per cycle - 4.876819018 seconds time elapsed + 13,006,962,004 cycles # 3.498 GHz + 31,804,543,831 instructions # 2.45 insn per cycle + 3.718659657 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1644) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.770728e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.386706e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.386706e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.194561 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.547917e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.391599e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.391599e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.241645 sec INFO: No Floating Point Exceptions have been reported - 11,361,909,372 cycles # 2.705 GHz - 20,728,193,515 instructions # 1.82 insn per cycle - 4.201564491 seconds time elapsed + 10,997,993,916 cycles # 3.390 GHz + 20,589,341,691 instructions # 1.87 insn per cycle + 3.245518188 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1966) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.837895e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.511052e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.511052e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.060412 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.629666e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.521126e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.521126e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.136875 sec INFO: No Floating Point Exceptions have been reported - 10,967,372,142 cycles # 2.697 GHz - 20,348,024,135 instructions # 1.86 insn per cycle - 4.067336299 seconds time elapsed + 10,652,887,194 cycles # 3.392 GHz + 20,009,865,369 instructions # 1.88 insn per cycle + 3.140687025 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1690) (512y: 181) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.574771e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.036722e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.036722e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.642745 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.236908e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.670822e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.670822e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.697812 sec INFO: No Floating Point Exceptions have been reported - 9,876,735,144 cycles # 2.125 GHz - 16,873,564,045 instructions # 1.71 insn per cycle - 4.649693422 seconds time elapsed + 9,210,173,022 cycles # 3.409 GHz + 16,530,589,084 instructions # 1.79 insn per cycle + 2.701650823 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 901) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt index 752636bf13..04d239f813 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_15:10:56 +DATE: 2024-05-16_15:16:16 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.514734e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.592124e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.118434e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 1.385277 sec -INFO: No Floating Point Exceptions have been reported - 4,585,820,337 cycles # 2.836 GHz - 7,177,605,134 instructions # 1.57 insn per cycle - 1.675534023 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.053610e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.236408e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.236408e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.412133e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.628068e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.628068e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 6.764205 sec +TOTAL : 5.073081 sec INFO: No Floating Point Exceptions have been reported - 19,353,708,257 cycles # 2.859 GHz - 44,070,957,602 instructions # 2.28 insn per cycle - 6.769682162 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) + 17,747,378,428 cycles # 3.497 GHz + 44,027,757,986 instructions # 2.48 insn per cycle + 5.075907933 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 435) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.538542e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.015186e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.015186e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.249979e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.900771e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.900771e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.882143 sec +TOTAL : 3.402943 sec INFO: No Floating Point Exceptions have been reported - 13,858,733,837 cycles # 2.836 GHz - 31,001,638,282 instructions # 2.24 insn per cycle - 4.887574523 seconds time elapsed + 11,912,110,688 cycles # 3.498 GHz + 30,984,203,166 instructions # 2.60 insn per cycle + 3.405716771 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1644) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.916460e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.668694e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.668694e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.718087e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.685398e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.685398e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.073472 sec +TOTAL : 2.927995 sec INFO: No Floating Point Exceptions have been reported - 11,156,313,503 cycles # 2.736 GHz - 19,267,334,271 instructions # 1.73 insn per cycle - 4.078862770 seconds time elapsed + 9,910,289,470 cycles # 3.382 GHz + 19,251,026,310 instructions # 1.94 insn per cycle + 2.930797627 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1966) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.001009e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.815270e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.815270e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.791935e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.810280e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.810280e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.939832 sec +TOTAL : 2.840901 sec INFO: No Floating Point Exceptions have been reported - 10,815,664,939 cycles # 2.742 GHz - 18,691,798,772 instructions # 1.73 insn per cycle - 3.945208768 seconds time elapsed + 9,615,157,995 cycles # 3.382 GHz + 18,672,256,452 instructions # 1.94 insn per cycle + 2.843647429 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1690) (512y: 181) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.659550e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.188571e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.188571e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.523847e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.275101e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.275101e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.600299 sec +TOTAL : 2.374393 sec INFO: No Floating Point Exceptions have been reported - 9,711,022,403 cycles # 2.112 GHz - 15,432,876,214 instructions # 1.59 insn per cycle - 4.605929662 seconds time elapsed + 8,074,601,724 cycles # 3.397 GHz + 15,407,700,600 instructions # 1.91 insn per cycle + 2.377184825 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 901) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt index 55f8e65d60..5f9b7e499d 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt @@ -1,214 +1,128 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_15:08:14 +DATE: 2024-05-16_15:16:04 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.528786e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.598045e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.123971e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.009724 sec -INFO: No Floating Point Exceptions have been reported - 3,524,520,771 cycles # 2.832 GHz - 6,986,852,382 instructions # 1.98 insn per cycle - 1.303213412 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst -==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.055666e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.238246e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.238246e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.381413 sec -INFO: No Floating Point Exceptions have been reported - 18,267,573,257 cycles # 2.861 GHz - 43,966,026,516 instructions # 2.41 insn per cycle - 6.386884750 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe: Aborted + 4,227,660 cycles # 3.245 GHz + 6,316,211 instructions # 1.49 insn per cycle + 0.041237880 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 435) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.556382e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.037173e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.037173e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.468246 sec -INFO: No Floating Point Exceptions have been reported - 12,780,247,489 cycles # 2.858 GHz - 30,998,946,765 instructions # 2.43 insn per cycle - 4.473714210 seconds time elapsed +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe: Aborted + 4,179,583 cycles # 3.239 GHz + 6,352,819 instructions # 1.52 insn per cycle + 0.038963988 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1644) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.920501e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.673029e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.673029e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.698478 sec -INFO: No Floating Point Exceptions have been reported - 10,077,708,073 cycles # 2.723 GHz - 19,366,955,499 instructions # 1.92 insn per cycle - 3.704491612 seconds time elapsed +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe: Aborted + 4,172,850 cycles # 2.620 GHz + 6,332,258 instructions # 1.52 insn per cycle + 0.039710095 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1966) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.006263e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.826924e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.826924e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.554344 sec -INFO: No Floating Point Exceptions have been reported - 9,708,097,650 cycles # 2.728 GHz - 18,987,540,468 instructions # 1.96 insn per cycle - 3.559725957 seconds time elapsed +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe: Aborted + 3,973,250 cycles # 2.596 GHz + 6,339,390 instructions # 1.60 insn per cycle + 0.038745082 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1690) (512y: 181) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.673906e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.201263e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.201263e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.180852 sec -INFO: No Floating Point Exceptions have been reported - 8,572,399,884 cycles # 2.048 GHz - 15,727,509,673 instructions # 1.83 insn per cycle - 4.186463799 seconds time elapsed +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe: Aborted + 3,997,344 cycles # 2.695 GHz + 6,335,669 instructions # 1.58 insn per cycle + 0.038904403 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 901) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt index 8320028620..3524907bed 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt @@ -1,216 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_15:05:29 +DATE: 2024-05-16_15:14:40 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.845001e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.545626e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.012755e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.945484 sec -INFO: No Floating Point Exceptions have been reported - 6,182,952,030 cycles # 2.840 GHz - 11,472,065,600 instructions # 1.86 insn per cycle - 2.234065267 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.053191e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.234940e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.234940e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.394281 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.410654e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.627660e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.627660e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 5.074615 sec INFO: No Floating Point Exceptions have been reported - 18,294,474,856 cycles # 2.859 GHz - 43,971,000,114 instructions # 2.40 insn per cycle - 6.399562206 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) + 17,749,993,456 cycles # 3.496 GHz + 44,027,344,775 instructions # 2.48 insn per cycle + 5.077371844 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 435) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.557791e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.040989e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.040989e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.463787 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.248368e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.898949e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.898949e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.408286 sec INFO: No Floating Point Exceptions have been reported - 12,772,778,524 cycles # 2.859 GHz - 30,998,712,334 instructions # 2.43 insn per cycle - 4.469196075 seconds time elapsed + 11,929,874,073 cycles # 3.498 GHz + 30,983,626,329 instructions # 2.60 insn per cycle + 3.411178393 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1644) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.925684e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.680642e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.680642e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.688587 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.717188e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.684291e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.684291e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.928745 sec INFO: No Floating Point Exceptions have been reported - 10,072,112,495 cycles # 2.727 GHz - 19,365,616,714 instructions # 1.92 insn per cycle - 3.694022814 seconds time elapsed + 9,914,485,778 cycles # 3.382 GHz + 19,250,668,720 instructions # 1.94 insn per cycle + 2.931625971 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1966) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.999182e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.825411e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.825411e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.565445 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.799056e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.830181e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.830181e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.843755 sec INFO: No Floating Point Exceptions have been reported - 9,737,532,909 cycles # 2.728 GHz - 18,976,607,709 instructions # 1.95 insn per cycle - 3.570830090 seconds time elapsed + 9,625,840,270 cycles # 3.382 GHz + 18,671,694,962 instructions # 1.94 insn per cycle + 2.846615920 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1690) (512y: 181) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.671566e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.197159e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.197159e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.185653 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.525745e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.268323e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.268323e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.373981 sec INFO: No Floating Point Exceptions have been reported - 8,570,262,444 cycles # 2.045 GHz - 15,727,819,138 instructions # 1.84 insn per cycle - 4.191109662 seconds time elapsed + 8,072,986,024 cycles # 3.397 GHz + 15,407,698,309 instructions # 1.91 insn per cycle + 2.376793413 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 901) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt index 6aa3de3ecf..c549d3b329 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_14:32:42 +DATE: 2024-05-16_14:54:31 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.832072e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.951586e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.177922e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.666506 sec -INFO: No Floating Point Exceptions have been reported - 2,570,661,306 cycles # 2.841 GHz - 3,994,547,928 instructions # 1.55 insn per cycle - 0.967516454 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.115974e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.322257e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.322257e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.054676 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.528086e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.781237e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.781237e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 4.730345 sec INFO: No Floating Point Exceptions have been reported - 17,515,565,744 cycles # 2.891 GHz - 41,813,477,100 instructions # 2.39 insn per cycle - 6.059803806 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 392) (avx2: 0) (512y: 0) (512z: 0) + 16,557,247,391 cycles # 3.498 GHz + 41,872,984,696 instructions # 2.53 insn per cycle + 4.733179835 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 394) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.620797e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.138518e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.138518e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.301844 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.287098e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.964780e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.964780e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.357673 sec INFO: No Floating Point Exceptions have been reported - 12,450,766,554 cycles # 2.891 GHz - 30,161,114,565 instructions # 2.42 insn per cycle - 4.307292943 seconds time elapsed + 11,740,376,151 cycles # 3.495 GHz + 30,144,486,447 instructions # 2.57 insn per cycle + 3.360151754 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1612) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.953305e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.731201e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.731201e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.641578 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.767086e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.781022e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.781022e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.887003 sec INFO: No Floating Point Exceptions have been reported - 9,958,194,708 cycles # 2.732 GHz - 19,097,340,022 instructions # 1.92 insn per cycle - 3.646748223 seconds time elapsed + 9,774,543,201 cycles # 3.383 GHz + 18,982,650,002 instructions # 1.94 insn per cycle + 2.889874818 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1931) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165090E-002 Relative difference = 1.0277089176796747e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.030144e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.871070e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.871070e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.516336 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.831390e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.882164e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.882164e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.810899 sec INFO: No Floating Point Exceptions have been reported - 9,615,329,857 cycles # 2.731 GHz - 18,757,197,169 instructions # 1.95 insn per cycle - 3.521486960 seconds time elapsed + 9,514,166,235 cycles # 3.382 GHz + 18,441,143,108 instructions # 1.94 insn per cycle + 2.813727674 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1662) (512y: 178) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165090E-002 Relative difference = 1.0277089176796747e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.716452e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.276447e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.276447e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.085042 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.536310e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.303726e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.303726e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.369421 sec INFO: No Floating Point Exceptions have been reported - 8,419,965,935 cycles # 2.059 GHz - 15,604,092,420 instructions # 1.85 insn per cycle - 4.090208290 seconds time elapsed + 8,055,861,241 cycles # 3.397 GHz + 15,282,458,066 instructions # 1.90 insn per cycle + 2.372003109 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 887) (512y: 156) (512z: 1239) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt index b8b45776b1..881ad88f88 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_14:50:17 +DATE: 2024-05-16_15:06:32 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.702175e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.710921e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.156854e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.694390 sec -INFO: No Floating Point Exceptions have been reported - 2,605,646,002 cycles # 2.805 GHz - 4,054,878,448 instructions # 1.56 insn per cycle - 0.990293563 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.570616e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.013848e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.013848e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.427060 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.175343e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.727679e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.727679e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.506144 sec INFO: No Floating Point Exceptions have been reported - 12,653,335,495 cycles # 2.855 GHz - 32,508,582,789 instructions # 2.57 insn per cycle - 4.432506316 seconds time elapsed + 12,274,429,099 cycles # 3.498 GHz + 32,594,760,258 instructions # 2.66 insn per cycle + 3.509011976 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 296) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.997942e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.867798e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.867798e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.572733 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.906763e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.092678e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.092678e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.770319 sec INFO: No Floating Point Exceptions have been reported - 10,224,022,422 cycles # 2.858 GHz - 24,474,305,392 instructions # 2.39 insn per cycle - 3.578147466 seconds time elapsed + 9,696,571,030 cycles # 3.497 GHz + 24,460,006,230 instructions # 2.52 insn per cycle + 2.773117767 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1251) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.166030e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.174429e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.174429e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.325864 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.991915e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.206251e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.206251e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.713535 sec INFO: No Floating Point Exceptions have been reported - 9,098,194,590 cycles # 2.732 GHz - 16,922,780,551 instructions # 1.86 insn per cycle - 3.331363940 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1631) (512y: 0) (512z: 0) + 9,202,322,106 cycles # 3.388 GHz + 16,771,044,980 instructions # 1.82 insn per cycle + 2.716375138 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1623) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.212975e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.265291e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.265291e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.263243 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.294367e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.822201e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.822201e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.504424 sec INFO: No Floating Point Exceptions have been reported - 8,899,581,855 cycles # 2.723 GHz - 16,332,700,862 instructions # 1.84 insn per cycle - 3.268811314 seconds time elapsed + 8,765,996,140 cycles # 3.497 GHz + 16,017,103,137 instructions # 1.83 insn per cycle + 2.507273198 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1370) (512y: 139) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.881513e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.573087e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.573087e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.766130 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.713472e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.714893e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.714893e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.285340 sec INFO: No Floating Point Exceptions have been reported - 7,870,004,063 cycles # 2.087 GHz - 14,582,523,760 instructions # 1.85 insn per cycle - 3.771527980 seconds time elapsed + 7,781,310,210 cycles # 3.401 GHz + 14,263,612,829 instructions # 1.83 insn per cycle + 2.288259219 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1015) (512y: 158) (512z: 955) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt index 36ca3a055a..15458a5536 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_14:50:44 +DATE: 2024-05-16_15:06:47 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.703728e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.728666e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.213805e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.683308 sec -INFO: No Floating Point Exceptions have been reported - 2,600,218,993 cycles # 2.827 GHz - 4,020,842,023 instructions # 1.55 insn per cycle - 0.979103636 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.089739e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.950650e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.950650e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.429960 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.961070e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.108053e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.108053e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.740479 sec INFO: No Floating Point Exceptions have been reported - 9,811,818,087 cycles # 2.857 GHz - 25,388,363,151 instructions # 2.59 insn per cycle - 3.435408237 seconds time elapsed + 9,588,624,171 cycles # 3.496 GHz + 25,475,292,972 instructions # 2.66 insn per cycle + 2.743367209 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 249) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.308707e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.576242e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.576242e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.150608 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.539656e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.478152e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.478152e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.390239 sec INFO: No Floating Point Exceptions have been reported - 9,017,040,298 cycles # 2.858 GHz - 21,483,572,468 instructions # 2.38 insn per cycle - 3.156151233 seconds time elapsed + 8,371,663,188 cycles # 3.499 GHz + 21,469,158,286 instructions # 2.56 insn per cycle + 2.393122141 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1119) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.317514e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.502934e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.502934e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.137476 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.412167e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.091616e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.091616e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.454676 sec INFO: No Floating Point Exceptions have been reported - 8,591,075,632 cycles # 2.734 GHz - 15,811,134,800 instructions # 1.84 insn per cycle - 3.143116597 seconds time elapsed + 8,591,996,487 cycles # 3.496 GHz + 15,696,649,387 instructions # 1.83 insn per cycle + 2.458017389 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1494) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.364262e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.604788e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.604788e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.081738 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.589110e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.477157e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.477157e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.343031 sec INFO: No Floating Point Exceptions have been reported - 8,442,051,612 cycles # 2.735 GHz - 15,504,513,991 instructions # 1.84 insn per cycle - 3.087247738 seconds time elapsed + 8,192,410,575 cycles # 3.493 GHz + 15,188,290,742 instructions # 1.85 insn per cycle + 2.345913373 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1268) (512y: 139) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.995819e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.803385e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.803385e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.575599 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.866424e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.079210e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.079210e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.219053 sec INFO: No Floating Point Exceptions have been reported - 7,560,717,738 cycles # 2.112 GHz - 14,283,918,013 instructions # 1.89 insn per cycle - 3.581217674 seconds time elapsed + 7,562,811,698 cycles # 3.405 GHz + 13,965,657,611 instructions # 1.85 insn per cycle + 2.221942369 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1041) (512y: 164) (512z: 874) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index e6a48e18ea..dbd01f373a 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_14:33:12 +DATE: 2024-05-16_14:54:48 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.602023e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.319974e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.288580e+09 ) sec^-1 -MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.573413 sec -INFO: No Floating Point Exceptions have been reported - 2,248,961,780 cycles # 2.823 GHz - 3,510,545,687 instructions # 1.56 insn per cycle - 0.854969378 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828112125134794E-002 -Relative difference = 7.1815552823662555e-06 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.082006e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.283429e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.283429e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.191643 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.590876e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.910010e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.910010e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 +TOTAL : 4.552360 sec INFO: No Floating Point Exceptions have been reported - 17,740,084,916 cycles # 2.864 GHz - 43,510,870,904 instructions # 2.45 insn per cycle - 6.196630799 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) + 15,929,481,571 cycles # 3.497 GHz + 43,645,354,190 instructions # 2.74 insn per cycle + 4.555018401 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039854866802E-002 Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.223628e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.400190e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.400190e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.211334 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.137313e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.789699e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.789699e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 +TOTAL : 2.593393 sec INFO: No Floating Point Exceptions have been reported - 9,244,093,548 cycles # 2.875 GHz - 21,907,620,538 instructions # 2.37 insn per cycle - 3.216400901 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1938) (avx2: 0) (512y: 0) (512z: 0) + 9,079,136,062 cycles # 3.498 GHz + 21,966,157,575 instructions # 2.42 insn per cycle + 2.595953355 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1940) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039280066150E-002 Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.419643e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.716778e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.716778e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.967957 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.413795e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.060357e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.060357e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 2.402651 sec INFO: No Floating Point Exceptions have been reported - 8,316,472,651 cycles # 2.798 GHz - 15,592,546,873 instructions # 1.87 insn per cycle - 2.972957823 seconds time elapsed + 8,148,048,804 cycles # 3.389 GHz + 15,492,987,780 instructions # 1.90 insn per cycle + 2.404822845 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2596) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.415944e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.716005e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.716005e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.973842 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.440410e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.116576e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.116576e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 2.402030 sec INFO: No Floating Point Exceptions have been reported - 8,315,082,395 cycles # 2.793 GHz - 15,436,266,122 instructions # 1.86 insn per cycle - 2.978955673 seconds time elapsed + 8,153,583,023 cycles # 3.391 GHz + 15,132,463,188 instructions # 1.86 insn per cycle + 2.404639550 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.416813e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.677411e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.677411e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.974949 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.526570e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.115568e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.115568e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 1.960575 sec INFO: No Floating Point Exceptions have been reported - 6,610,937,423 cycles # 2.219 GHz - 12,863,752,208 instructions # 1.95 insn per cycle - 2.980091060 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) + 6,696,963,172 cycles # 3.412 GHz + 12,564,921,515 instructions # 1.88 insn per cycle + 1.963196726 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1738) (512y: 5) (512z: 1439) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052585973637E-002 Relative difference = 2.0158743040564767e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt index 29c604a610..e292abf6c8 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt @@ -1,227 +1,188 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_15:00:26 +DATE: 2024-05-16_15:12:25 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.941162e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.420865e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.420865e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 -TOTAL : 1.745821 sec -INFO: No Floating Point Exceptions have been reported - 5,608,708,868 cycles # 2.838 GHz - 10,190,752,473 instructions # 1.82 insn per cycle - 2.033892232 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828112125134794E-002 -Relative difference = 7.1815552823662555e-06 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.055357e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.251163e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.251163e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.442554 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.571360e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.882241e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.882241e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 +TOTAL : 4.648614 sec INFO: No Floating Point Exceptions have been reported - 18,431,262,224 cycles # 2.859 GHz - 43,659,496,470 instructions # 2.37 insn per cycle - 6.448903506 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) + 16,274,421,129 cycles # 3.499 GHz + 43,795,272,918 instructions # 2.69 insn per cycle + 4.652134666 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039854866802E-002 Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.110619e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.160777e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.160777e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.492633 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.041129e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.508515e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.508515e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 +TOTAL : 2.757451 sec INFO: No Floating Point Exceptions have been reported - 9,993,500,583 cycles # 2.857 GHz - 23,243,476,984 instructions # 2.33 insn per cycle - 3.498991107 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1938) (avx2: 0) (512y: 0) (512z: 0) + 9,657,450,376 cycles # 3.498 GHz + 23,301,227,828 instructions # 2.41 insn per cycle + 2.760901970 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1940) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039280066150E-002 Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.275264e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.392372e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.392372e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.272099 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.261327e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.737341e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.737341e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 2.580566 sec INFO: No Floating Point Exceptions have been reported - 9,005,707,266 cycles # 2.748 GHz - 16,711,349,389 instructions # 1.86 insn per cycle - 3.278668519 seconds time elapsed + 8,778,518,065 cycles # 3.398 GHz + 16,616,549,598 instructions # 1.89 insn per cycle + 2.583972383 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2596) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.297831e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.443567e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.443567e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.244587 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.285502e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.783778e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.783778e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 2.577641 sec INFO: No Floating Point Exceptions have been reported - 8,928,752,660 cycles # 2.747 GHz - 16,549,135,089 instructions # 1.85 insn per cycle - 3.250993607 seconds time elapsed + 8,769,810,281 cycles # 3.398 GHz + 16,258,423,929 instructions # 1.85 insn per cycle + 2.581094908 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.241559e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.306249e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.306249e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 3.324488 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.225378e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.191390e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.191390e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 2.171708 sec INFO: No Floating Point Exceptions have been reported - 7,378,511,382 cycles # 2.216 GHz - 14,071,008,703 instructions # 1.91 insn per cycle - 3.330848983 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) + 7,440,255,171 cycles # 3.422 GHz + 13,772,888,504 instructions # 1.85 insn per cycle + 2.175185973 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1738) (512y: 5) (512z: 1439) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052585973637E-002 Relative difference = 2.0158743040564767e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt index 8016aaf3c8..9f5985de89 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_15:11:30 +DATE: 2024-05-16_15:16:34 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.314779e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.179276e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.254245e+09 ) sec^-1 -MeanMatrixElemValue = ( 1.371863e-02 +- 3.269951e-06 ) GeV^0 -TOTAL : 1.213916 sec -INFO: No Floating Point Exceptions have been reported - 4,089,914,869 cycles # 2.847 GHz - 6,594,462,327 instructions # 1.61 insn per cycle - 1.494122889 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828112125134794E-002 -Relative difference = 7.1815552823662555e-06 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.089350e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.292539e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.292539e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.592846e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.910692e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.910692e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 6.490729 sec +TOTAL : 4.548136 sec INFO: No Floating Point Exceptions have been reported - 18,740,706,935 cycles # 2.886 GHz - 43,689,321,367 instructions # 2.33 insn per cycle - 6.495941000 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) + 15,918,672,194 cycles # 3.498 GHz + 43,645,157,370 instructions # 2.74 insn per cycle + 4.550768739 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039854866802E-002 Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.213519e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.407340e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.407340e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.171803e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.793341e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.793341e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 3.565494 sec +TOTAL : 2.571626 sec INFO: No Floating Point Exceptions have been reported - 10,288,737,724 cycles # 2.883 GHz - 21,988,558,280 instructions # 2.14 insn per cycle - 3.570732391 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1938) (avx2: 0) (512y: 0) (512z: 0) + 9,002,835,519 cycles # 3.498 GHz + 21,965,803,400 instructions # 2.44 insn per cycle + 2.574182913 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1940) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039280066150E-002 Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.392103e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.656361e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.656361e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.395956e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.063603e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.063603e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.347045 sec +TOTAL : 2.415626 sec INFO: No Floating Point Exceptions have been reported - 9,294,224,919 cycles # 2.774 GHz - 15,502,535,760 instructions # 1.67 insn per cycle - 3.352354405 seconds time elapsed + 8,200,472,498 cycles # 3.392 GHz + 15,495,873,799 instructions # 1.89 insn per cycle + 2.418229335 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2596) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.411853e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.706271e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.706271e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.442464e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.112321e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.112321e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.334245 sec +TOTAL : 2.398280 sec INFO: No Floating Point Exceptions have been reported - 9,229,877,586 cycles # 2.765 GHz - 15,144,508,612 instructions # 1.64 insn per cycle - 3.339505215 seconds time elapsed + 8,137,879,411 cycles # 3.391 GHz + 15,137,769,043 instructions # 1.86 insn per cycle + 2.400787686 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.389642e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.623022e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.623022e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.518730e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.107954e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.107954e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.358884 sec +TOTAL : 1.966307 sec INFO: No Floating Point Exceptions have been reported - 7,623,474,420 cycles # 2.266 GHz - 12,573,351,599 instructions # 1.65 insn per cycle - 3.364654068 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) + 6,715,271,102 cycles # 3.411 GHz + 12,564,745,941 instructions # 1.87 insn per cycle + 1.968945583 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1738) (512y: 5) (512z: 1439) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052585973637E-002 Relative difference = 2.0158743040564767e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt index 3bd2ee01ac..406665424a 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt @@ -1,214 +1,128 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_15:08:45 +DATE: 2024-05-16_15:16:05 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.323461e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.185145e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.269757e+09 ) sec^-1 -MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.877821 sec -INFO: No Floating Point Exceptions have been reported - 3,113,911,295 cycles # 2.829 GHz - 6,352,740,713 instructions # 2.04 insn per cycle - 1.157340966 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst -==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828112125134794E-002 -Relative difference = 7.1815552823662555e-06 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.080933e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.281840e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.281840e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.198018 sec -INFO: No Floating Point Exceptions have been reported - 17,744,200,571 cycles # 2.861 GHz - 43,507,633,337 instructions # 2.45 insn per cycle - 6.203254296 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe: Aborted + 4,006,756 cycles # 2.641 GHz + 6,337,589 instructions # 1.58 insn per cycle + 0.038820558 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039854866802E-002 Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.206413e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.379725e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.379725e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.236598 sec -INFO: No Floating Point Exceptions have been reported - 9,264,626,353 cycles # 2.859 GHz - 21,909,129,569 instructions # 2.36 insn per cycle - 3.241826343 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1938) (avx2: 0) (512y: 0) (512z: 0) +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe: Aborted + 4,042,828 cycles # 2.599 GHz + 6,349,685 instructions # 1.57 insn per cycle + 0.039473651 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1940) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039280066150E-002 Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.378945e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.622195e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.622195e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.023073 sec -INFO: No Floating Point Exceptions have been reported - 8,285,944,372 cycles # 2.737 GHz - 15,591,046,995 instructions # 1.88 insn per cycle - 3.028351636 seconds time elapsed +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe: Aborted + 3,980,821 cycles # 2.593 GHz + 6,345,323 instructions # 1.59 insn per cycle + 0.040604688 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2596) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.406944e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.689990e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.689990e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.991499 sec -INFO: No Floating Point Exceptions have been reported - 8,215,974,247 cycles # 2.742 GHz - 15,434,394,808 instructions # 1.88 insn per cycle - 2.996736921 seconds time elapsed +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe: Aborted + 3,992,443 cycles # 2.637 GHz + 6,345,359 instructions # 1.59 insn per cycle + 0.039143335 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.365448e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.582442e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.582442e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 3.039216 sec -INFO: No Floating Point Exceptions have been reported - 6,609,003,865 cycles # 2.172 GHz - 12,863,939,056 instructions # 1.95 insn per cycle - 3.044504803 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe: Aborted + 3,962,677 cycles # 2.635 GHz + 6,339,192 instructions # 1.60 insn per cycle + 0.040340396 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1738) (512y: 5) (512z: 1439) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052585973637E-002 Relative difference = 2.0158743040564767e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt index ef6806658f..fed66850ae 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt @@ -1,216 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_15:06:01 +DATE: 2024-05-16_15:14:58 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.717298e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.145539e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.143407e+09 ) sec^-1 -MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 -TOTAL : 1.533112 sec -INFO: No Floating Point Exceptions have been reported - 4,980,418,158 cycles # 2.839 GHz - 9,119,342,139 instructions # 1.83 insn per cycle - 1.812784805 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828112125134794E-002 -Relative difference = 7.1815552823662555e-06 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.069115e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.265540e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.265540e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.262605 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.589425e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.907636e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.907636e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 +TOTAL : 4.558643 sec INFO: No Floating Point Exceptions have been reported - 17,921,464,120 cycles # 2.860 GHz - 43,508,155,770 instructions # 2.43 insn per cycle - 6.267871711 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) + 15,933,699,286 cycles # 3.494 GHz + 43,645,740,209 instructions # 2.74 insn per cycle + 4.561263957 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039854866802E-002 Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.205993e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.384022e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.384022e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.237394 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.166265e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.789294e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.789294e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 +TOTAL : 2.576762 sec INFO: No Floating Point Exceptions have been reported - 9,270,351,681 cycles # 2.860 GHz - 21,907,147,046 instructions # 2.36 insn per cycle - 3.242634203 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1938) (avx2: 0) (512y: 0) (512z: 0) + 9,021,421,888 cycles # 3.498 GHz + 21,965,682,086 instructions # 2.43 insn per cycle + 2.579422110 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1940) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039280066150E-002 Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.379256e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.625966e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.625966e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.019248 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.410624e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.067194e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.067194e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 2.407835 sec INFO: No Floating Point Exceptions have been reported - 8,297,873,717 cycles # 2.744 GHz - 15,590,905,283 instructions # 1.88 insn per cycle - 3.024598202 seconds time elapsed + 8,170,522,097 cycles # 3.390 GHz + 15,495,739,902 instructions # 1.90 insn per cycle + 2.410473326 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2596) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.398987e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.671948e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.671948e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.000824 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.440773e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.107588e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.107588e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 2.400218 sec INFO: No Floating Point Exceptions have been reported - 8,223,361,227 cycles # 2.736 GHz - 15,434,590,756 instructions # 1.88 insn per cycle - 3.006023707 seconds time elapsed + 8,147,236,501 cycles # 3.391 GHz + 15,137,712,310 instructions # 1.86 insn per cycle + 2.402812237 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.360241e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.570739e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.570739e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 3.046199 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.529243e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.119603e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.119603e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 1.960262 sec INFO: No Floating Point Exceptions have been reported - 6,615,724,908 cycles # 2.169 GHz - 12,863,710,849 instructions # 1.94 insn per cycle - 3.051492012 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) + 6,697,446,385 cycles # 3.413 GHz + 12,564,565,403 instructions # 1.88 insn per cycle + 1.962895171 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1738) (512y: 5) (512z: 1439) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052585973637E-002 Relative difference = 2.0158743040564767e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt index b613786442..f38625daf9 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_14:33:39 +DATE: 2024-05-16_14:55:04 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.604585e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.336536e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.343718e+09 ) sec^-1 -MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.571057 sec -INFO: No Floating Point Exceptions have been reported - 2,242,744,669 cycles # 2.822 GHz - 3,531,920,926 instructions # 1.57 insn per cycle - 0.851832101 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 95 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828112125134794E-002 -Relative difference = 7.1815552823662555e-06 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.153085e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.384746e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.384746e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 5.830325 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.718462e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.096695e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.096695e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 +TOTAL : 4.259405 sec INFO: No Floating Point Exceptions have been reported - 16,691,813,815 cycles # 2.861 GHz - 41,266,181,474 instructions # 2.47 insn per cycle - 5.835359179 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 375) (avx2: 0) (512y: 0) (512z: 0) + 14,906,619,571 cycles # 3.498 GHz + 41,403,712,152 instructions # 2.78 insn per cycle + 4.261977624 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 377) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039854866802E-002 Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.304367e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.587722e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.587722e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.109672 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.281651e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.052640e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.052640e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 +TOTAL : 2.507101 sec INFO: No Floating Point Exceptions have been reported - 8,995,426,679 cycles # 2.889 GHz - 21,211,089,826 instructions # 2.36 insn per cycle - 3.114839321 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1843) (avx2: 0) (512y: 0) (512z: 0) + 8,776,093,343 cycles # 3.497 GHz + 21,269,495,730 instructions # 2.42 insn per cycle + 2.509732736 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1845) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039280066150E-002 Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.420820e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.716893e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.716893e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.969606 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.439328e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.118918e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.118918e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 2.392823 sec INFO: No Floating Point Exceptions have been reported - 8,272,952,138 cycles # 2.782 GHz - 15,425,102,157 instructions # 1.86 insn per cycle - 2.974640700 seconds time elapsed + 8,121,995,124 cycles # 3.391 GHz + 15,330,104,720 instructions # 1.89 insn per cycle + 2.395419581 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2537) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.475869e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.854556e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.854556e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.912175 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.462402e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.159225e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.159225e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 2.389124 sec INFO: No Floating Point Exceptions have been reported - 8,117,590,540 cycles # 2.783 GHz - 15,233,342,033 instructions # 1.88 insn per cycle - 2.917189383 seconds time elapsed + 8,111,785,966 cycles # 3.391 GHz + 14,942,135,580 instructions # 1.84 insn per cycle + 2.392160016 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2423) (512y: 8) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.412788e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.668874e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.668874e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.982256 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.526160e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.104377e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.104377e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 1.963768 sec INFO: No Floating Point Exceptions have been reported - 6,592,409,084 cycles # 2.208 GHz - 12,843,659,599 instructions # 1.95 insn per cycle - 2.987368722 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1706) (512y: 18) (512z: 1427) + 6,690,632,313 cycles # 3.407 GHz + 12,541,712,444 instructions # 1.87 insn per cycle + 1.965941647 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1708) (512y: 6) (512z: 1427) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052564145764E-002 Relative difference = 1.9988585667912256e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt index e6d46e5965..79dfc35e39 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_14:51:09 +DATE: 2024-05-16_15:07:00 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.307514e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.195447e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.293637e+09 ) sec^-1 -MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.586214 sec -INFO: No Floating Point Exceptions have been reported - 2,288,759,129 cycles # 2.822 GHz - 3,563,945,826 instructions # 1.56 insn per cycle - 0.869586754 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828112125134794E-002 -Relative difference = 7.1815552823662555e-06 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.594420e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.075669e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.075669e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 4.325231 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.226642e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.906339e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.906339e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 +TOTAL : 3.422836 sec INFO: No Floating Point Exceptions have been reported - 12,164,411,956 cycles # 2.810 GHz - 32,427,707,417 instructions # 2.67 insn per cycle - 4.330470336 seconds time elapsed + 11,973,671,245 cycles # 3.497 GHz + 32,591,128,424 instructions # 2.72 insn per cycle + 3.425270504 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 312) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039840314887E-002 Relative difference = 1.244813035273009e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.607791e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.429113e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.429113e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.796823 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.726722e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.217994e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.217994e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 +TOTAL : 2.277045 sec INFO: No Floating Point Exceptions have been reported - 8,009,111,157 cycles # 2.859 GHz - 18,657,618,704 instructions # 2.33 insn per cycle - 2.802139139 seconds time elapsed + 7,973,162,968 cycles # 3.498 GHz + 18,723,001,222 instructions # 2.35 insn per cycle + 2.279715985 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1555) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039283704129E-002 Relative difference = 5.583829420356249e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.719759e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.472647e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.472647e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.690741 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.859329e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.134184e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.134184e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 2.190870 sec INFO: No Floating Point Exceptions have been reported - 7,416,864,109 cycles # 2.752 GHz - 14,251,974,045 instructions # 1.92 insn per cycle - 2.696083346 seconds time elapsed + 7,454,401,193 cycles # 3.399 GHz + 14,160,226,764 instructions # 1.90 insn per cycle + 2.193513000 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2237) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053244447801E-002 Relative difference = 2.5291823782248813e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.774877e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.630440e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.630440e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.645076 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.090151e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.687076e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.687076e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 2.109079 sec INFO: No Floating Point Exceptions have been reported - 7,291,130,406 cycles # 2.752 GHz - 13,948,384,567 instructions # 1.91 insn per cycle - 2.650598467 seconds time elapsed + 7,374,925,228 cycles # 3.493 GHz + 13,649,605,338 instructions # 1.85 insn per cycle + 2.111734544 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2096) (512y: 3) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053244447801E-002 Relative difference = 2.5291823782248813e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.434115e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.741003e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.741003e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.962177 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.513682e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.047803e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.047803e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 1.965310 sec INFO: No Floating Point Exceptions have been reported - 6,479,327,720 cycles # 2.184 GHz - 13,423,401,797 instructions # 2.07 insn per cycle - 2.967420151 seconds time elapsed + 6,714,526,284 cycles # 3.412 GHz + 13,128,070,975 instructions # 1.96 insn per cycle + 1.967953731 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2071) (512y: 1) (512z: 1198) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052562326775E-002 Relative difference = 1.997440588685788e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt index 1851f3246c..4d1949ef99 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_14:51:33 +DATE: 2024-05-16_15:07:14 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.308899e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.200904e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.329787e+09 ) sec^-1 -MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.585570 sec -INFO: No Floating Point Exceptions have been reported - 2,293,480,451 cycles # 2.820 GHz - 3,552,176,680 instructions # 1.55 insn per cycle - 0.870100804 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 95 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828112125134794E-002 -Relative difference = 7.1815552823662555e-06 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.129885e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.080551e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.080551e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.330346 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.050290e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.497444e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.497444e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 +TOTAL : 2.660655 sec INFO: No Floating Point Exceptions have been reported - 9,425,530,261 cycles # 2.826 GHz - 25,263,309,757 instructions # 2.68 insn per cycle - 3.335509619 seconds time elapsed + 9,305,990,245 cycles # 3.495 GHz + 25,428,917,364 instructions # 2.73 insn per cycle + 2.663311168 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 263) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039838495897E-002 Relative difference = 1.2589928273811243e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.953227e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.493970e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.493970e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.514743 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.336793e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.087513e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.087513e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 +TOTAL : 2.041698 sec INFO: No Floating Point Exceptions have been reported - 7,199,101,915 cycles # 2.858 GHz - 16,870,111,415 instructions # 2.34 insn per cycle - 2.520226033 seconds time elapsed + 7,143,134,191 cycles # 3.495 GHz + 16,934,497,640 instructions # 2.37 insn per cycle + 2.044415863 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1360) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039280066150E-002 Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.869871e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.903620e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.903620e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.568153 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.301147e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.295276e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.295276e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 2.025295 sec INFO: No Floating Point Exceptions have been reported - 7,089,400,745 cycles # 2.756 GHz - 13,616,924,187 instructions # 1.92 insn per cycle - 2.573571442 seconds time elapsed + 7,078,862,679 cycles # 3.491 GHz + 13,524,937,850 instructions # 1.91 insn per cycle + 2.027915407 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2060) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053220800939E-002 Relative difference = 2.5107486628541925e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.911116e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.034757e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.034757e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.534380 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.391243e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.540215e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.540215e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 2.004933 sec INFO: No Floating Point Exceptions have been reported - 7,042,060,221 cycles # 2.774 GHz - 13,426,671,587 instructions # 1.91 insn per cycle - 2.539847169 seconds time elapsed + 7,005,576,991 cycles # 3.491 GHz + 13,133,179,972 instructions # 1.87 insn per cycle + 2.007248760 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1945) (512y: 4) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053220800939E-002 Relative difference = 2.5107486628541925e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.525985e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.967300e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.967300e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.871748 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.666850e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.557261e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.557261e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 1.919696 sec INFO: No Floating Point Exceptions have been reported - 6,325,625,286 cycles # 2.199 GHz - 13,154,721,049 instructions # 2.08 insn per cycle - 2.877120825 seconds time elapsed + 6,563,329,807 cycles # 3.415 GHz + 12,859,576,357 instructions # 1.96 insn per cycle + 1.922379873 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2029) (512y: 1) (512z: 1083) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052536860923E-002 Relative difference = 1.977588895209662e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index b626a014f8..d9c97c18b0 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_14:34:05 +DATE: 2024-05-16_14:55:19 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.830025e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.944832e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.160865e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.662041 sec -INFO: No Floating Point Exceptions have been reported - 2,559,219,510 cycles # 2.861 GHz - 3,969,506,530 instructions # 1.55 insn per cycle - 0.952802853 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039901590279E-002 -Relative difference = 7.671454200650844e-09 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.044346e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.221236e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.221236e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.439706 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.378447e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.583594e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.583594e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 5.178071 sec INFO: No Floating Point Exceptions have been reported - 18,648,827,254 cycles # 2.894 GHz - 44,218,351,924 instructions # 2.37 insn per cycle - 6.444755062 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 439) (avx2: 0) (512y: 0) (512z: 0) + 18,116,948,400 cycles # 3.497 GHz + 44,279,589,774 instructions # 2.44 insn per cycle + 5.180853981 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 441) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.634240e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.158489e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.158489e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.270020 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.281421e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.956307e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.956307e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.366088 sec INFO: No Floating Point Exceptions have been reported - 12,337,216,169 cycles # 2.886 GHz - 30,918,100,190 instructions # 2.51 insn per cycle - 4.275170664 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1685) (avx2: 0) (512y: 0) (512z: 0) + 11,777,222,914 cycles # 3.497 GHz + 30,827,112,234 instructions # 2.62 insn per cycle + 3.368562047 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1691) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.943703e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.696046e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.696046e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.651391 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.736561e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.717816e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.717816e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.909518 sec INFO: No Floating Point Exceptions have been reported - 10,097,284,751 cycles # 2.762 GHz - 19,374,074,587 instructions # 1.92 insn per cycle - 3.656592402 seconds time elapsed + 9,848,016,253 cycles # 3.382 GHz + 19,171,423,006 instructions # 1.95 insn per cycle + 2.912299252 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2130) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.039225e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.880994e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.880994e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.498933 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.854841e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.934985e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.934985e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.797845 sec INFO: No Floating Point Exceptions have been reported - 9,699,890,764 cycles # 2.769 GHz - 18,944,296,026 instructions # 1.95 insn per cycle - 3.504313379 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1860) (512y: 188) (512z: 0) + 9,472,574,926 cycles # 3.383 GHz + 18,622,411,462 instructions # 1.97 insn per cycle + 2.800624578 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1863) (512y: 188) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.766168e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.359450e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.359450e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.977432 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.630799e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.513671e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.513671e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.322462 sec INFO: No Floating Point Exceptions have been reported - 8,362,626,878 cycles # 2.101 GHz - 15,058,722,791 instructions # 1.80 insn per cycle - 3.982532855 seconds time elapsed + 7,905,195,508 cycles # 3.400 GHz + 14,645,416,360 instructions # 1.85 insn per cycle + 2.325199959 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1024) (512y: 155) (512z: 1316) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt index f9780717c1..65b806c1d2 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_14:34:35 +DATE: 2024-05-16_14:55:36 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.831074e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.944999e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.163112e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.663702 sec -INFO: No Floating Point Exceptions have been reported - 2,550,713,530 cycles # 2.845 GHz - 3,995,712,636 instructions # 1.57 insn per cycle - 0.958037940 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039901590279E-002 -Relative difference = 7.671454200650844e-09 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.088480e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.281697e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.281697e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.193535 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.456800e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.687118e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.687118e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 4.935224 sec INFO: No Floating Point Exceptions have been reported - 17,967,058,694 cycles # 2.899 GHz - 42,467,805,223 instructions # 2.36 insn per cycle - 6.198684795 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 421) (avx2: 0) (512y: 0) (512z: 0) + 17,240,969,832 cycles # 3.495 GHz + 42,526,916,761 instructions # 2.47 insn per cycle + 4.937746362 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 423) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.676284e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.231904e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.231904e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.170684 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.367141e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.103399e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.103399e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.265876 sec INFO: No Floating Point Exceptions have been reported - 12,134,694,075 cycles # 2.906 GHz - 30,224,929,059 instructions # 2.49 insn per cycle - 4.175943490 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1692) (avx2: 0) (512y: 0) (512z: 0) + 11,424,467,425 cycles # 3.496 GHz + 30,110,081,669 instructions # 2.64 insn per cycle + 3.268663149 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1698) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.950812e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.735198e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.735198e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.639278 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.777925e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.800468e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.800468e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.882551 sec INFO: No Floating Point Exceptions have been reported - 10,078,657,444 cycles # 2.766 GHz - 19,257,126,653 instructions # 1.91 insn per cycle - 3.644365244 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2146) (512y: 0) (512z: 0) + 9,762,346,673 cycles # 3.384 GHz + 19,123,478,649 instructions # 1.96 insn per cycle + 2.885422638 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2149) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.049769e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.898049e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.898049e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.481211 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.923402e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.059057e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.059057e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.739345 sec INFO: No Floating Point Exceptions have been reported - 9,647,917,970 cycles # 2.768 GHz - 18,746,418,128 instructions # 1.94 insn per cycle - 3.486360008 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1834) (512y: 191) (512z: 0) + 9,280,284,072 cycles # 3.385 GHz + 18,421,211,222 instructions # 1.98 insn per cycle + 2.742071393 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1837) (512y: 191) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.796433e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.409552e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.409552e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.919050 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.654222e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.566246e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.566246e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.310726 sec INFO: No Floating Point Exceptions have been reported - 8,244,471,456 cycles # 2.102 GHz - 14,980,246,059 instructions # 1.82 insn per cycle - 3.924194596 seconds time elapsed + 7,863,763,045 cycles # 3.399 GHz + 14,563,021,097 instructions # 1.85 insn per cycle + 2.313550512 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1021) (512y: 156) (512z: 1305) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index 205a4bf5b6..2776c79ca9 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_14:35:05 +DATE: 2024-05-16_14:55:54 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.201162e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.181610e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.277713e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.525559 sec -INFO: No Floating Point Exceptions have been reported - 2,155,305,398 cycles # 2.849 GHz - 3,120,666,963 instructions # 1.45 insn per cycle - 0.814520269 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.068773e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.129905e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.129905e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.176250 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.789464e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.864452e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.864452e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 3.886517 sec INFO: No Floating Point Exceptions have been reported - 15,001,077,825 cycles # 2.896 GHz - 38,374,710,401 instructions # 2.56 insn per cycle - 5.181415080 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) + 13,600,504,995 cycles # 3.497 GHz + 38,380,906,031 instructions # 2.82 insn per cycle + 3.889317912 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 674) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515645 Relative difference = 3.258803994438787e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.492980e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.684039e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.684039e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.109528 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.845691e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.093226e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.093226e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.290221 sec INFO: No Floating Point Exceptions have been reported - 9,049,547,879 cycles # 2.906 GHz - 24,578,150,431 instructions # 2.72 insn per cycle - 3.114795475 seconds time elapsed + 8,019,541,175 cycles # 3.498 GHz + 24,571,068,753 instructions # 3.06 insn per cycle + 2.293071476 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.554648e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.034559e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.034559e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.996517 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 7.867637e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.519099e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.519099e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.460379 sec INFO: No Floating Point Exceptions have been reported - 5,443,502,791 cycles # 2.721 GHz - 11,251,469,346 instructions # 2.07 insn per cycle - 2.001703471 seconds time elapsed + 4,872,147,779 cycles # 3.330 GHz + 11,227,733,615 instructions # 2.30 insn per cycle + 1.463257220 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.119114e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.713742e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.713742e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.821745 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 8.110279e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.793548e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.793548e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.415836 sec INFO: No Floating Point Exceptions have been reported - 4,960,408,882 cycles # 2.716 GHz - 10,558,806,229 instructions # 2.13 insn per cycle - 1.826903839 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2074) (512y: 144) (512z: 0) + 4,723,686,789 cycles # 3.330 GHz + 10,632,343,497 instructions # 2.25 insn per cycle + 1.418646127 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2125) (512y: 124) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.693426e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.898518e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.898518e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.947911 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 8.311347e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.018241e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.018241e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.384756 sec INFO: No Floating Point Exceptions have been reported - 5,367,244,097 cycles # 1.818 GHz - 7,793,958,391 instructions # 1.45 insn per cycle - 2.953294554 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1542) + 4,617,023,596 cycles # 3.328 GHz + 7,671,857,273 instructions # 1.66 insn per cycle + 1.387628052 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1438) (512y: 100) (512z: 1540) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt index 4b2366d44f..9f614d2e92 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt @@ -1,227 +1,188 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:00:55 +DATE: 2024-05-16_15:12:41 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.373758e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.924060e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.924060e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.825705 sec -INFO: No Floating Point Exceptions have been reported - 3,037,157,201 cycles # 2.832 GHz - 4,768,877,833 instructions # 1.57 insn per cycle - 1.128818887 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.032947e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.092197e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.092197e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.344113 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.779713e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.854203e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.854203e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 3.941033 sec INFO: No Floating Point Exceptions have been reported - 15,315,317,736 cycles # 2.863 GHz - 38,433,762,310 instructions # 2.51 insn per cycle - 5.351126978 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) + 13,799,747,574 cycles # 3.498 GHz + 38,446,512,754 instructions # 2.79 insn per cycle + 3.944972423 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 674) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515645 Relative difference = 3.258803994438787e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.394451e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.578816e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.578816e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.279010 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.784034e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.025706e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.025706e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.374177 sec INFO: No Floating Point Exceptions have been reported - 9,390,215,737 cycles # 2.859 GHz - 24,761,602,813 instructions # 2.64 insn per cycle - 3.285914811 seconds time elapsed + 8,313,425,452 cycles # 3.496 GHz + 24,750,069,495 instructions # 2.98 insn per cycle + 2.378016368 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.346272e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.804430e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.804430e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.151067 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 7.721964e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.347312e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.347312e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.539706 sec INFO: No Floating Point Exceptions have been reported - 5,795,064,676 cycles # 2.687 GHz - 11,538,955,643 instructions # 1.99 insn per cycle - 2.157987463 seconds time elapsed + 5,151,677,249 cycles # 3.338 GHz + 11,509,930,615 instructions # 2.23 insn per cycle + 1.543538979 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.949125e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.512113e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.512113e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.953091 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 7.929625e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.588534e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.588534e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.503019 sec INFO: No Floating Point Exceptions have been reported - 5,277,608,562 cycles # 2.695 GHz - 10,845,633,589 instructions # 2.06 insn per cycle - 1.960046746 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2074) (512y: 144) (512z: 0) + 5,031,273,010 cycles # 3.339 GHz + 10,912,509,093 instructions # 2.17 insn per cycle + 1.506915320 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2125) (512y: 124) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.545325e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.736253e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.736253e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.149111 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 8.182703e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.868825e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.868825e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.462204 sec INFO: No Floating Point Exceptions have been reported - 5,725,568,726 cycles # 1.815 GHz - 8,037,864,149 instructions # 1.40 insn per cycle - 3.156036160 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1542) + 4,894,621,540 cycles # 3.339 GHz + 7,911,118,728 instructions # 1.62 insn per cycle + 1.466076132 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1438) (512y: 100) (512z: 1540) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt index 66fdf9efe4..7960bd4fc7 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:11:59 +DATE: 2024-05-16_15:16:49 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.582142e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.158915e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.274993e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 0.629579 sec -INFO: No Floating Point Exceptions have been reported - 2,438,671,292 cycles # 2.828 GHz - 3,557,518,240 instructions # 1.46 insn per cycle - 0.918692112 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.063642e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.124319e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.124319e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.792705e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.867470e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.867470e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 5.250994 sec +TOTAL : 3.882179 sec INFO: No Floating Point Exceptions have been reported - 15,177,224,624 cycles # 2.888 GHz - 38,389,589,114 instructions # 2.53 insn per cycle - 5.256694767 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) + 13,590,452,132 cycles # 3.499 GHz + 38,381,415,159 instructions # 2.82 insn per cycle + 3.884999642 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 674) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515645 Relative difference = 3.258803994438787e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.469246e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.659787e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.659787e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.818661e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.062840e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.062840e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.192043 sec +TOTAL : 2.302066 sec INFO: No Floating Point Exceptions have been reported - 9,234,869,625 cycles # 2.889 GHz - 24,577,322,685 instructions # 2.66 insn per cycle - 3.197667860 seconds time elapsed + 8,054,845,661 cycles # 3.495 GHz + 24,571,174,273 instructions # 3.05 insn per cycle + 2.304828748 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.520662e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.999169e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.999169e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 7.811961e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.456700e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.456700e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.069942 sec +TOTAL : 1.470660 sec INFO: No Floating Point Exceptions have been reported - 5,642,462,557 cycles # 2.720 GHz - 11,233,692,701 instructions # 1.99 insn per cycle - 2.075542898 seconds time elapsed + 4,906,328,782 cycles # 3.330 GHz + 11,227,802,708 instructions # 2.29 insn per cycle + 1.473487930 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.151383e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.740134e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.740134e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 8.128991e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.819245e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.819245e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.875924 sec +TOTAL : 1.414626 sec INFO: No Floating Point Exceptions have been reported - 5,122,190,825 cycles # 2.724 GHz - 10,508,387,782 instructions # 2.05 insn per cycle - 1.881606947 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2074) (512y: 144) (512z: 0) + 4,719,736,167 cycles # 3.330 GHz + 10,630,404,841 instructions # 2.25 insn per cycle + 1.417503853 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2125) (512y: 124) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.617306e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.815381e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.815381e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 8.329901e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.039190e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.039190e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.070946 sec +TOTAL : 1.381867 sec INFO: No Floating Point Exceptions have been reported - 5,582,158,144 cycles # 1.816 GHz - 7,742,870,902 instructions # 1.39 insn per cycle - 3.076599052 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1542) + 4,609,836,466 cycles # 3.330 GHz + 7,671,855,782 instructions # 1.66 insn per cycle + 1.384694427 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1438) (512y: 100) (512z: 1540) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt index 82194f6fe3..8060d197b3 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt @@ -1,214 +1,128 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:09:12 +DATE: 2024-05-16_15:16:07 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.587611e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.161872e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.276844e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.568860 sec -INFO: No Floating Point Exceptions have been reported - 2,269,706,021 cycles # 2.822 GHz - 3,484,022,632 instructions # 1.54 insn per cycle - 0.860923648 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.045564e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.105865e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.105865e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.235402 sec -INFO: No Floating Point Exceptions have been reported - 14,997,948,844 cycles # 2.862 GHz - 38,373,416,469 instructions # 2.56 insn per cycle - 5.240872993 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe: Aborted + 3,905,079 cycles # 2.680 GHz + 6,285,573 instructions # 1.61 insn per cycle + 0.039944283 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 674) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515645 Relative difference = 3.258803994438787e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.444491e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.632712e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.632712e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.153505 sec -INFO: No Floating Point Exceptions have been reported - 9,049,779,346 cycles # 2.866 GHz - 24,577,971,625 instructions # 2.72 insn per cycle - 3.158944927 seconds time elapsed +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe: Aborted + 3,881,664 cycles # 2.680 GHz + 6,304,668 instructions # 1.62 insn per cycle + 0.038823519 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.437117e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.904229e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.904229e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.038232 sec -INFO: No Floating Point Exceptions have been reported - 5,473,582,641 cycles # 2.680 GHz - 11,251,858,191 instructions # 2.06 insn per cycle - 2.043714380 seconds time elapsed +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe: Aborted + 4,012,821 cycles # 3.252 GHz + 6,293,421 instructions # 1.57 insn per cycle + 0.038405470 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.066218e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.650713e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.650713e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.837947 sec -INFO: No Floating Point Exceptions have been reported - 4,942,309,563 cycles # 2.682 GHz - 10,557,200,123 instructions # 2.14 insn per cycle - 1.844865568 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2074) (512y: 144) (512z: 0) +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe: Aborted + 4,075,981 cycles # 3.242 GHz + 6,297,027 instructions # 1.54 insn per cycle + 0.038295092 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2125) (512y: 124) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.598977e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.794496e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.794496e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.023039 sec -INFO: No Floating Point Exceptions have been reported - 5,367,715,100 cycles # 1.773 GHz - 7,793,769,749 instructions # 1.45 insn per cycle - 3.028517366 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1542) +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe: Aborted + 3,966,761 cycles # 3.256 GHz + 6,291,754 instructions # 1.59 insn per cycle + 0.038747033 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1438) (512y: 100) (512z: 1540) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt index 3db0a99453..bc2e3a2d1d 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt @@ -1,216 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:06:28 +DATE: 2024-05-16_15:15:13 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.591450e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.156507e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.275190e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.728834 sec -INFO: No Floating Point Exceptions have been reported - 2,711,621,820 cycles # 2.826 GHz - 4,288,575,941 instructions # 1.58 insn per cycle - 1.017933550 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.045774e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.105988e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.105988e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.234926 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.810618e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.886696e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.886696e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 3.858525 sec INFO: No Floating Point Exceptions have been reported - 14,996,539,700 cycles # 2.862 GHz - 38,373,492,139 instructions # 2.56 insn per cycle - 5.240540958 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) + 13,499,659,763 cycles # 3.496 GHz + 38,381,595,588 instructions # 2.84 insn per cycle + 3.861871026 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 674) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515645 Relative difference = 3.258803994438787e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.431303e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.617753e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.617753e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.165641 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.842479e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.089616e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.089616e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.292785 sec INFO: No Floating Point Exceptions have been reported - 9,072,261,960 cycles # 2.862 GHz - 24,578,342,604 instructions # 2.71 insn per cycle - 3.171145800 seconds time elapsed + 8,029,889,332 cycles # 3.498 GHz + 24,570,970,084 instructions # 3.06 insn per cycle + 2.295676801 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.460196e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.936686e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.936686e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.030403 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 7.854323e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.508345e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.508345e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.463664 sec INFO: No Floating Point Exceptions have been reported - 5,452,336,471 cycles # 2.679 GHz - 11,251,160,510 instructions # 2.06 insn per cycle - 2.035938093 seconds time elapsed + 4,881,173,354 cycles # 3.329 GHz + 11,227,429,391 instructions # 2.30 insn per cycle + 1.466667703 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.063893e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.649981e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.649981e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.838982 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 8.108854e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.793319e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.793319e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.416234 sec INFO: No Floating Point Exceptions have been reported - 4,938,631,038 cycles # 2.680 GHz - 10,556,930,414 instructions # 2.14 insn per cycle - 1.844618889 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2074) (512y: 144) (512z: 0) + 4,724,507,539 cycles # 3.330 GHz + 10,632,467,883 instructions # 2.25 insn per cycle + 1.419048077 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2125) (512y: 124) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.589787e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.785615e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.785615e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.030446 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 8.341696e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.055467e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.055467e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.380333 sec INFO: No Floating Point Exceptions have been reported - 5,385,276,295 cycles # 1.774 GHz - 7,793,583,016 instructions # 1.45 insn per cycle - 3.036161028 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1542) + 4,605,178,871 cycles # 3.330 GHz + 7,671,819,434 instructions # 1.67 insn per cycle + 1.383249425 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1438) (512y: 100) (512z: 1540) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt index 0caf1293cf..96b6cb5ddc 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_14:35:28 +DATE: 2024-05-16_14:56:06 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.206695e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.183658e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.279171e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.521961 sec -INFO: No Floating Point Exceptions have been reported - 2,148,802,757 cycles # 2.845 GHz - 3,054,152,486 instructions # 1.42 insn per cycle - 0.812117976 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.068168e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.129039e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.129039e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.177399 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.723912e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.795203e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.795203e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 3.977781 sec INFO: No Floating Point Exceptions have been reported - 15,011,872,798 cycles # 2.897 GHz - 40,100,761,049 instructions # 2.67 insn per cycle - 5.182501125 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 669) (avx2: 0) (512y: 0) (512z: 0) + 13,919,226,237 cycles # 3.497 GHz + 40,192,452,967 instructions # 2.89 insn per cycle + 3.980522127 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 687) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.634343e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.844834e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.844834e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.993727 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 5.103438e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.377235e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.377235e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.180803 sec INFO: No Floating Point Exceptions have been reported - 8,671,029,072 cycles # 2.892 GHz - 23,670,969,931 instructions # 2.73 insn per cycle - 2.999072752 seconds time elapsed + 7,633,753,795 cycles # 3.496 GHz + 23,664,117,605 instructions # 3.10 insn per cycle + 2.183625692 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2072) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.945254e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.323667e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.323667e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.228700 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 7.032470e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.549227e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.549227e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.619630 sec INFO: No Floating Point Exceptions have been reported - 6,081,438,462 cycles # 2.724 GHz - 13,061,002,322 instructions # 2.15 insn per cycle - 2.233958089 seconds time elapsed + 5,396,970,498 cycles # 3.327 GHz + 13,039,165,073 instructions # 2.42 insn per cycle + 1.622514788 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2546) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.205594e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.622405e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.622405e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.121856 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 7.285369e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.833132e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.833132e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.562859 sec INFO: No Floating Point Exceptions have been reported - 5,798,891,312 cycles # 2.727 GHz - 12,319,969,769 instructions # 2.12 insn per cycle - 2.127030294 seconds time elapsed + 5,207,784,461 cycles # 3.326 GHz + 12,264,767,613 instructions # 2.36 insn per cycle + 1.566117430 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2093) (512y: 294) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.380432e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.550251e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.550251e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.209519 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 7.933129e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.586264e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.586264e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.445504 sec INFO: No Floating Point Exceptions have been reported - 5,821,355,640 cycles # 1.812 GHz - 9,603,981,726 instructions # 1.65 insn per cycle - 3.214724733 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1509) (512y: 209) (512z: 1971) + 4,806,228,356 cycles # 3.319 GHz + 9,536,324,378 instructions # 1.98 insn per cycle + 1.448384054 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1520) (512y: 199) (512z: 1970) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt index 6af05ea7e1..dddeeae07d 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_14:51:55 +DATE: 2024-05-16_15:07:26 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.681198e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.166116e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.276872e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.529611 sec -INFO: No Floating Point Exceptions have been reported - 2,190,477,637 cycles # 2.832 GHz - 3,135,955,530 instructions # 1.43 insn per cycle - 0.830299558 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.383572e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.466296e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.466296e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.507918 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.198908e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.297709e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.297709e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 3.405685 sec INFO: No Floating Point Exceptions have been reported - 13,013,442,526 cycles # 2.884 GHz - 34,387,029,075 instructions # 2.64 insn per cycle - 4.513459426 seconds time elapsed + 11,921,831,379 cycles # 3.498 GHz + 34,393,959,805 instructions # 2.88 insn per cycle + 3.408627699 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 686) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.946707e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.083881e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.083881e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.667816 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.725883e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.870404e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.870404e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.940621 sec INFO: No Floating Point Exceptions have been reported - 10,591,846,077 cycles # 2.884 GHz - 24,007,245,790 instructions # 2.27 insn per cycle - 3.673406920 seconds time elapsed + 10,298,753,385 cycles # 3.499 GHz + 24,000,547,193 instructions # 2.33 insn per cycle + 2.943559898 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2582) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.532632e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.849376e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.849376e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.423240 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 6.008721e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.379316e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.379316e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.872348 sec INFO: No Floating Point Exceptions have been reported - 6,577,855,979 cycles # 2.709 GHz - 12,401,365,684 instructions # 1.89 insn per cycle - 2.428791768 seconds time elapsed + 6,230,740,888 cycles # 3.323 GHz + 12,378,068,788 instructions # 1.99 insn per cycle + 1.875290821 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3154) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516200 Relative difference = 3.2588037208240405e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.754457e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.104775e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.104775e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.314834 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 6.541455e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.984945e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.984945e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.728378 sec INFO: No Floating Point Exceptions have been reported - 6,233,998,487 cycles # 2.688 GHz - 11,576,068,199 instructions # 1.86 insn per cycle - 2.320534715 seconds time elapsed + 5,754,725,980 cycles # 3.325 GHz + 11,517,937,425 instructions # 2.00 insn per cycle + 1.731372509 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2690) (512y: 239) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516200 Relative difference = 3.2588037208240405e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.687851e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.893233e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.893233e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.952132 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 7.969262e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.630621e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.630621e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.439347 sec INFO: No Floating Point Exceptions have been reported - 5,323,772,693 cycles # 1.802 GHz - 9,296,912,008 instructions # 1.75 insn per cycle - 2.957828928 seconds time elapsed + 4,794,037,689 cycles # 3.325 GHz + 9,235,777,391 instructions # 1.93 insn per cycle + 1.442310458 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2115) (512y: 282) (512z: 1958) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt index 2040ec21eb..848c11f2a0 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_14:52:19 +DATE: 2024-05-16_15:07:38 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.680230e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.168644e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.280417e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.532171 sec -INFO: No Floating Point Exceptions have been reported - 2,169,507,018 cycles # 2.828 GHz - 3,115,355,964 instructions # 1.44 insn per cycle - 0.826043020 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.524819e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.617052e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.617052e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.262483 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.388100e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.498766e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.498766e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 3.223593 sec INFO: No Floating Point Exceptions have been reported - 12,358,560,610 cycles # 2.896 GHz - 35,037,446,637 instructions # 2.84 insn per cycle - 4.268207887 seconds time elapsed + 11,284,349,168 cycles # 3.498 GHz + 35,046,689,761 instructions # 3.11 insn per cycle + 3.226554607 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 457) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.908483e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.040450e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.040450e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.714757 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.701373e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.844016e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.844016e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.958949 sec INFO: No Floating Point Exceptions have been reported - 10,745,562,014 cycles # 2.889 GHz - 23,084,374,218 instructions # 2.15 insn per cycle - 3.720383315 seconds time elapsed + 10,361,253,789 cycles # 3.499 GHz + 23,077,337,760 instructions # 2.23 insn per cycle + 2.961886589 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2363) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.878271e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.246530e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.246530e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.258864 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 6.561160e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.005948e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.005948e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.725333 sec INFO: No Floating Point Exceptions have been reported - 6,151,591,588 cycles # 2.717 GHz - 11,956,808,073 instructions # 1.94 insn per cycle - 2.264473200 seconds time elapsed + 5,745,944,141 cycles # 3.325 GHz + 11,933,253,629 instructions # 2.08 insn per cycle + 1.728282440 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2509) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.958079e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.345089e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.345089e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.224234 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 6.836858e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.318369e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.318369e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.657487 sec INFO: No Floating Point Exceptions have been reported - 6,017,653,055 cycles # 2.700 GHz - 11,128,128,624 instructions # 1.85 insn per cycle - 2.229785356 seconds time elapsed + 5,521,172,266 cycles # 3.326 GHz + 11,071,234,458 instructions # 2.01 insn per cycle + 1.660417251 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2126) (512y: 174) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.739650e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.951827e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.951827e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.913360 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 8.306028e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.015563e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.015563e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.385953 sec INFO: No Floating Point Exceptions have been reported - 5,212,798,448 cycles # 1.786 GHz - 9,020,884,070 instructions # 1.73 insn per cycle - 2.919040069 seconds time elapsed + 4,617,200,796 cycles # 3.325 GHz + 8,960,232,192 instructions # 1.94 insn per cycle + 1.388841142 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1650) (512y: 208) (512z: 1567) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 93f412dad4..1a23ff636c 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_14:35:52 +DATE: 2024-05-16_14:56:18 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.088595e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.705968e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.969781e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.482195 sec -INFO: No Floating Point Exceptions have been reported - 2,007,920,858 cycles # 2.849 GHz - 2,840,933,430 instructions # 1.41 insn per cycle - 0.763422225 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028811e+00 -Avg ME (F77/GPU) = 2.0288499749731272 -Relative difference = 1.9210746159747678e-05 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.200574e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.271569e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.271569e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.850875 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.051088e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.152005e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.152005e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 3.552408 sec INFO: No Floating Point Exceptions have been reported - 14,073,569,281 cycles # 2.899 GHz - 38,343,239,881 instructions # 2.72 insn per cycle - 4.855897587 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) + 12,424,102,263 cycles # 3.496 GHz + 38,259,948,535 instructions # 3.08 insn per cycle + 3.554812663 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 589) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288199022179469 Relative difference = 4.819651478256564e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.925449e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.332953e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.332953e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.217076 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 6.542346e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.036288e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.036288e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 1.717374 sec INFO: No Floating Point Exceptions have been reported - 6,436,588,824 cycles # 2.899 GHz - 15,815,821,412 instructions # 2.46 insn per cycle - 2.222049918 seconds time elapsed + 6,013,460,590 cycles # 3.497 GHz + 15,822,230,782 instructions # 2.63 insn per cycle + 1.719941893 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2690) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193548331037 Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.963004e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.029520e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.029520e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.256665 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.245149e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.415618e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.415618e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 +TOTAL : 0.951492 sec INFO: No Floating Point Exceptions have been reported - 3,455,760,948 cycles # 2.740 GHz - 7,593,976,565 instructions # 2.20 insn per cycle - 1.261861875 seconds time elapsed + 3,185,224,100 cycles # 3.340 GHz + 7,575,013,217 instructions # 2.38 insn per cycle + 0.954091020 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3049) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181684445590 Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.569986e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.110539e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.110539e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.182427 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.281797e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.465090e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.465090e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 +TOTAL : 0.929151 sec INFO: No Floating Point Exceptions have been reported - 3,244,770,474 cycles # 2.734 GHz - 7,203,559,407 instructions # 2.22 insn per cycle - 1.187623854 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2849) (512y: 23) (512z: 0) + 3,111,746,449 cycles # 3.341 GHz + 7,223,059,934 instructions # 2.32 insn per cycle + 0.931750183 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2904) (512y: 3) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181684445590 Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.864494e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.605662e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.605662e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.614546 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.443829e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.680917e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.680917e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 0.836985 sec INFO: No Floating Point Exceptions have been reported - 3,050,749,421 cycles # 1.885 GHz - 5,835,755,685 instructions # 1.91 insn per cycle - 1.619564037 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2368) (512y: 24) (512z: 1888) + 2,808,031,651 cycles # 3.346 GHz + 5,761,002,673 instructions # 2.05 insn per cycle + 0.839577856 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2371) (512y: 0) (512z: 1888) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183349184692 Relative difference = 1.6508058850146622e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt index 426db838d7..06dd9166f9 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt @@ -1,227 +1,188 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:01:19 +DATE: 2024-05-16_15:12:53 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.801236e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.462846e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.462846e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 -TOTAL : 0.684862 sec -INFO: No Floating Point Exceptions have been reported - 2,586,573,508 cycles # 2.828 GHz - 4,016,406,941 instructions # 1.55 insn per cycle - 0.971565490 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028811e+00 -Avg ME (F77/GPU) = 2.0288499749731272 -Relative difference = 1.9210746159747678e-05 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.176436e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.247449e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.247449e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.948763 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.049014e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.149704e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.149704e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 3.575558 sec INFO: No Floating Point Exceptions have been reported - 14,176,104,430 cycles # 2.862 GHz - 38,383,843,895 instructions # 2.71 insn per cycle - 4.955194603 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) + 12,521,485,093 cycles # 3.499 GHz + 38,305,059,348 instructions # 3.06 insn per cycle + 3.579105569 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 589) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288199022179469 Relative difference = 4.819651478256564e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.809798e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.200764e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.200764e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.315849 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 6.491526e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.976028e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.976028e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 1.760872 sec INFO: No Floating Point Exceptions have been reported - 6,633,418,276 cycles # 2.858 GHz - 16,095,968,093 instructions # 2.43 insn per cycle - 2.322298973 seconds time elapsed + 6,169,634,576 cycles # 3.497 GHz + 16,102,875,657 instructions # 2.61 insn per cycle + 1.764343930 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2690) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193548331037 Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.679036e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.925640e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.925640e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.342355 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.226753e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.391987e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.391987e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 +TOTAL : 0.997271 sec INFO: No Floating Point Exceptions have been reported - 3,640,592,514 cycles # 2.701 GHz - 7,831,268,120 instructions # 2.15 insn per cycle - 1.348786146 seconds time elapsed + 3,349,146,328 cycles # 3.348 GHz + 7,812,668,287 instructions # 2.33 insn per cycle + 1.000767699 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3049) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181684445590 Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.163700e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.056629e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.056629e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.278871 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.260335e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.433565e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.433565e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 +TOTAL : 0.976826 sec INFO: No Floating Point Exceptions have been reported - 3,437,646,895 cycles # 2.676 GHz - 7,439,842,858 instructions # 2.16 insn per cycle - 1.285386542 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2849) (512y: 23) (512z: 0) + 3,282,049,179 cycles # 3.348 GHz + 7,462,210,417 instructions # 2.27 insn per cycle + 0.980420198 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2904) (512y: 3) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181684445590 Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.597215e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.292791e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.292791e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.727252 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.408011e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.634578e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.634578e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 0.890705 sec INFO: No Floating Point Exceptions have been reported - 3,258,697,081 cycles # 1.881 GHz - 6,089,840,836 instructions # 1.87 insn per cycle - 1.733818978 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2368) (512y: 24) (512z: 1888) + 2,994,251,807 cycles # 3.349 GHz + 6,016,187,326 instructions # 2.01 insn per cycle + 0.894265671 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2371) (512y: 0) (512z: 1888) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183349184692 Relative difference = 1.6508058850146622e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt index 884891874e..39a743ab58 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:12:23 +DATE: 2024-05-16_15:17:01 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.468958e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.648278e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.971571e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.079446e+00 +- 3.403306e-03 ) GeV^0 -TOTAL : 0.575797 sec -INFO: No Floating Point Exceptions have been reported - 2,271,357,910 cycles # 2.845 GHz - 3,342,640,625 instructions # 1.47 insn per cycle - 0.855647595 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028811e+00 -Avg ME (F77/GPU) = 2.0288499749731272 -Relative difference = 1.9210746159747678e-05 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.198151e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.269622e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.269622e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.053107e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.154324e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.154324e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 4.914840 sec +TOTAL : 3.550675 sec INFO: No Floating Point Exceptions have been reported - 14,211,276,974 cycles # 2.889 GHz - 38,370,210,397 instructions # 2.70 insn per cycle - 4.920108721 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) + 12,430,557,442 cycles # 3.499 GHz + 38,261,252,207 instructions # 3.08 insn per cycle + 3.553342568 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 589) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288199022179469 Relative difference = 4.819651478256564e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.892733e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.301573e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.301573e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 6.544926e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.039506e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.039506e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 2.288479 sec +TOTAL : 1.716095 sec INFO: No Floating Point Exceptions have been reported - 6,608,042,838 cycles # 2.882 GHz - 15,829,158,403 instructions # 2.40 insn per cycle - 2.293691008 seconds time elapsed + 6,010,423,272 cycles # 3.498 GHz + 15,822,165,662 instructions # 2.63 insn per cycle + 1.718690630 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2690) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193548331037 Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.919042e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.023820e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.023820e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.247900e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.416525e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.416525e+06 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.319201 sec +TOTAL : 0.949440 sec INFO: No Floating Point Exceptions have been reported - 3,618,631,378 cycles # 2.734 GHz - 7,578,247,859 instructions # 2.09 insn per cycle - 1.324366743 seconds time elapsed + 3,178,973,029 cycles # 3.340 GHz + 7,575,031,563 instructions # 2.38 insn per cycle + 0.952033981 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3049) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181684445590 Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.492699e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.100151e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.100151e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.283370e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.462893e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.462893e+06 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.250231 sec +TOTAL : 0.928789 sec INFO: No Floating Point Exceptions have been reported - 3,418,366,623 cycles # 2.724 GHz - 7,152,275,486 instructions # 2.09 insn per cycle - 1.255758340 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2849) (512y: 23) (512z: 0) + 3,108,736,160 cycles # 3.339 GHz + 7,224,185,601 instructions # 2.32 insn per cycle + 0.931414201 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2904) (512y: 3) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181684445590 Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.830732e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.562097e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.562097e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.437994e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.673352e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.673352e+06 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.681529 sec +TOTAL : 0.840102 sec INFO: No Floating Point Exceptions have been reported - 3,218,452,038 cycles # 1.909 GHz - 5,786,270,960 instructions # 1.80 insn per cycle - 1.686847993 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2368) (512y: 24) (512z: 1888) + 2,814,712,978 cycles # 3.341 GHz + 5,761,169,749 instructions # 2.05 insn per cycle + 0.842697262 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2371) (512y: 0) (512z: 1888) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183349184692 Relative difference = 1.6508058850146622e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt index 9b5852a8c1..cc474d6ce9 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt @@ -1,214 +1,128 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:09:35 +DATE: 2024-05-16_15:16:08 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.497286e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.653761e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.976765e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.520499 sec -INFO: No Floating Point Exceptions have been reported - 2,122,949,824 cycles # 2.819 GHz - 3,308,605,661 instructions # 1.56 insn per cycle - 0.811337951 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst -==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028811e+00 -Avg ME (F77/GPU) = 2.0288499749731272 -Relative difference = 1.9210746159747678e-05 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.187282e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.258952e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.258952e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.881435 sec -INFO: No Floating Point Exceptions have been reported - 13,993,887,356 cycles # 2.864 GHz - 38,340,879,445 instructions # 2.74 insn per cycle - 4.886765699 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe: Aborted + 4,114,159 cycles # 3.246 GHz + 6,325,343 instructions # 1.54 insn per cycle + 0.038529387 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 589) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288199022179469 Relative difference = 4.819651478256564e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.866184e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.266559e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.266559e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.243695 sec -INFO: No Floating Point Exceptions have been reported - 6,437,628,216 cycles # 2.863 GHz - 15,815,570,005 instructions # 2.46 insn per cycle - 2.248941783 seconds time elapsed +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe: Aborted + 4,007,195 cycles # 3.247 GHz + 6,305,244 instructions # 1.57 insn per cycle + 0.037795572 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2690) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193548331037 Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.699018e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.949673e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.949673e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.293092 sec -INFO: No Floating Point Exceptions have been reported - 3,447,035,685 cycles # 2.657 GHz - 7,594,377,345 instructions # 2.20 insn per cycle - 1.298317015 seconds time elapsed +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe: Aborted + 3,978,139 cycles # 3.240 GHz + 6,305,133 instructions # 1.58 insn per cycle + 0.038545257 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3049) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181684445590 Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.410196e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.089229e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.089229e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.202332 sec -INFO: No Floating Point Exceptions have been reported - 3,248,094,322 cycles # 2.691 GHz - 7,201,883,054 instructions # 2.22 insn per cycle - 1.207739630 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2849) (512y: 23) (512z: 0) +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe: Aborted + 4,051,552 cycles # 3.250 GHz + 6,325,721 instructions # 1.56 insn per cycle + 0.037990069 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2904) (512y: 3) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181684445590 Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.682713e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.392370e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.392370e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.657517 sec -INFO: No Floating Point Exceptions have been reported - 3,060,341,406 cycles # 1.842 GHz - 5,836,262,166 instructions # 1.91 insn per cycle - 1.662864711 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2368) (512y: 24) (512z: 1888) +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe: Aborted + 4,166,692 cycles # 2.635 GHz + 6,311,223 instructions # 1.51 insn per cycle + 0.038878370 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2371) (512y: 0) (512z: 1888) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183349184692 Relative difference = 1.6508058850146622e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt index 7e3b1fa48e..2598a17a4b 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt @@ -1,216 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:06:52 +DATE: 2024-05-16_15:15:25 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.502594e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.623050e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.943883e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 -TOTAL : 0.628666 sec -INFO: No Floating Point Exceptions have been reported - 2,403,264,425 cycles # 2.820 GHz - 3,734,811,294 instructions # 1.55 insn per cycle - 0.909767197 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028811e+00 -Avg ME (F77/GPU) = 2.0288499749731272 -Relative difference = 1.9210746159747678e-05 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.185686e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.257300e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.257300e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.884819 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.055035e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.156447e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.156447e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 3.547705 sec INFO: No Floating Point Exceptions have been reported - 13,995,449,913 cycles # 2.863 GHz - 38,340,978,131 instructions # 2.74 insn per cycle - 4.889991891 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) + 12,420,939,646 cycles # 3.499 GHz + 38,261,315,045 instructions # 3.08 insn per cycle + 3.550403868 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 589) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288199022179469 Relative difference = 4.819651478256564e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.864053e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.263128e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.263128e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.244540 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 6.543960e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.036828e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.036828e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 1.716108 sec INFO: No Floating Point Exceptions have been reported - 6,436,419,349 cycles # 2.862 GHz - 15,815,556,279 instructions # 2.46 insn per cycle - 2.249779623 seconds time elapsed + 6,012,121,369 cycles # 3.498 GHz + 15,822,170,874 instructions # 2.63 insn per cycle + 1.718814295 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2690) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193548331037 Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.799961e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.008748e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.008748e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.279634 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.245919e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.415937e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.415937e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 +TOTAL : 0.951520 sec INFO: No Floating Point Exceptions have been reported - 3,447,592,643 cycles # 2.685 GHz - 7,593,708,789 instructions # 2.20 insn per cycle - 1.284877623 seconds time elapsed + 3,185,388,754 cycles # 3.340 GHz + 7,574,845,638 instructions # 2.38 insn per cycle + 0.954103825 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3049) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181684445590 Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.434984e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.092289e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.092289e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.198937 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.288057e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.468111e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.468111e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 +TOTAL : 0.925986 sec INFO: No Floating Point Exceptions have been reported - 3,242,375,801 cycles # 2.694 GHz - 7,202,509,960 instructions # 2.22 insn per cycle - 1.204245270 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2849) (512y: 23) (512z: 0) + 3,101,678,421 cycles # 3.341 GHz + 7,222,956,816 instructions # 2.33 insn per cycle + 0.928709358 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2904) (512y: 3) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181684445590 Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.713311e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.432943e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.432943e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.650810 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.438320e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.674891e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.674891e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 0.839988 sec INFO: No Floating Point Exceptions have been reported - 3,050,285,995 cycles # 1.842 GHz - 5,834,789,164 instructions # 1.91 insn per cycle - 1.656446986 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2368) (512y: 24) (512z: 1888) + 2,816,211,672 cycles # 3.343 GHz + 5,760,731,099 instructions # 2.05 insn per cycle + 0.842654266 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2371) (512y: 0) (512z: 1888) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183349184692 Relative difference = 1.6508058850146622e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt index 3e123e6fd7..abb5b0cb23 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_14:36:11 +DATE: 2024-05-16_14:56:27 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.096553e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.763289e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.037690e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.480283 sec -INFO: No Floating Point Exceptions have been reported - 2,036,711,218 cycles # 2.852 GHz - 2,918,453,967 instructions # 1.43 insn per cycle - 0.771336406 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028811e+00 -Avg ME (F77/GPU) = 2.0288499749731272 -Relative difference = 1.9210746159747678e-05 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.166079e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.236793e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.236793e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.926463 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.116597e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.221945e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.221945e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 3.480996 sec INFO: No Floating Point Exceptions have been reported - 14,320,299,267 cycles # 2.905 GHz - 39,836,243,439 instructions # 2.78 insn per cycle - 4.931482509 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 570) (avx2: 0) (512y: 0) (512z: 0) + 12,173,964,605 cycles # 3.496 GHz + 39,802,438,725 instructions # 3.27 insn per cycle + 3.483478182 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 580) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288199028000236 Relative difference = 4.790961076489297e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.723514e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.285593e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.285593e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 1.919156 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 8.116195e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.907393e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.907393e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 1.406100 sec INFO: No Floating Point Exceptions have been reported - 5,582,245,803 cycles # 2.902 GHz - 15,285,424,302 instructions # 2.74 insn per cycle - 1.924109376 seconds time elapsed + 4,925,094,494 cycles # 3.497 GHz + 15,291,998,664 instructions # 3.10 insn per cycle + 1.408797942 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2474) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193548331037 Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.349024e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.991002e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.991002e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.737963 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 9.102214e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.978269e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.978269e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 +TOTAL : 1.261979 sec INFO: No Floating Point Exceptions have been reported - 4,749,494,972 cycles # 2.726 GHz - 9,735,095,064 instructions # 2.05 insn per cycle - 1.742978161 seconds time elapsed + 4,207,491,304 cycles # 3.328 GHz + 9,715,468,356 instructions # 2.31 insn per cycle + 1.264618453 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3708) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288182108197361 Relative difference = 1.0391259163456515e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.536931e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.219273e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.219273e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.690263 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 9.226890e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.012042e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.012042e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 +TOTAL : 1.247778 sec INFO: No Floating Point Exceptions have been reported - 4,623,322,631 cycles # 2.728 GHz - 9,325,575,279 instructions # 2.02 insn per cycle - 1.695318457 seconds time elapsed + 4,163,957,666 cycles # 3.331 GHz + 9,272,726,706 instructions # 2.23 insn per cycle + 1.250391290 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3496) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288182108197361 Relative difference = 1.0391259163456515e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.572579e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.052133e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.052133e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.968034 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.175307e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.329979e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.329979e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 1.002712 sec INFO: No Floating Point Exceptions have been reported - 3,660,831,684 cycles # 1.856 GHz - 7,034,974,988 instructions # 1.92 insn per cycle - 1.973212700 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2610) (512y: 12) (512z: 2220) + 3,349,707,803 cycles # 3.333 GHz + 6,969,691,373 instructions # 2.08 insn per cycle + 1.005329902 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2611) (512y: 0) (512z: 2220) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183459779248 Relative difference = 1.7053177021099307e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt index c7eded0fc2..a2a4f8bd4a 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_14:52:42 +DATE: 2024-05-16_15:07:51 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.456356e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.657836e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.983561e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.487671 sec -INFO: No Floating Point Exceptions have been reported - 2,030,099,363 cycles # 2.844 GHz - 2,856,891,631 instructions # 1.41 insn per cycle - 0.771313393 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028811e+00 -Avg ME (F77/GPU) = 2.0288499749731272 -Relative difference = 1.9210746159747678e-05 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.397227e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.481743e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.481743e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.460576 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.274775e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.391142e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.391142e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 3.317724 sec INFO: No Floating Point Exceptions have been reported - 12,588,647,411 cycles # 2.819 GHz - 34,372,288,545 instructions # 2.73 insn per cycle - 4.465853868 seconds time elapsed + 11,614,161,745 cycles # 3.498 GHz + 34,394,408,726 instructions # 2.96 insn per cycle + 3.320490594 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 696) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288199094356969 Relative difference = 4.463890496342449e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.225217e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.687950e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.687950e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.095644 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 7.102905e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.687439e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.687439e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 1.590451 sec INFO: No Floating Point Exceptions have been reported - 6,085,238,066 cycles # 2.897 GHz - 14,860,574,019 instructions # 2.44 insn per cycle - 2.101017455 seconds time elapsed + 5,571,972,185 cycles # 3.498 GHz + 14,867,156,586 instructions # 2.67 insn per cycle + 1.593201332 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3009) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193803280592 Relative difference = 1.8746278463897685e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.969640e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.750011e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.750011e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.592133 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 9.246464e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.014393e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.014393e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 +TOTAL : 1.243841 sec INFO: No Floating Point Exceptions have been reported - 4,316,607,801 cycles # 2.703 GHz - 9,028,975,402 instructions # 2.09 insn per cycle - 1.597664902 seconds time elapsed + 4,150,312,815 cycles # 3.330 GHz + 9,009,830,893 instructions # 2.17 insn per cycle + 1.246594471 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4443) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181999931112 Relative difference = 9.857617164523888e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.187100e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.023996e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.023996e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.545422 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 9.750139e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.075227e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.075227e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 +TOTAL : 1.187335 sec INFO: No Floating Point Exceptions have been reported - 4,204,195,380 cycles # 2.712 GHz - 8,663,569,400 instructions # 2.06 insn per cycle - 1.550927334 seconds time elapsed + 3,964,482,809 cycles # 3.332 GHz + 8,611,789,877 instructions # 2.17 insn per cycle + 1.190075774 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4243) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181999931112 Relative difference = 9.857617164523888e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.251438e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.680453e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.680453e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 2.083936 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.076286e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.204393e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.204393e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 1.085926 sec INFO: No Floating Point Exceptions have been reported - 3,833,998,104 cycles # 1.836 GHz - 7,808,361,622 instructions # 2.04 insn per cycle - 2.089489123 seconds time elapsed + 3,624,515,529 cycles # 3.331 GHz + 7,753,366,929 instructions # 2.14 insn per cycle + 1.088713881 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4424) (512y: 0) (512z: 2555) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183246739209 Relative difference = 1.6003107281264138e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt index aad34f68a4..c17343ca4e 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_14:53:02 +DATE: 2024-05-16_15:08:00 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.520611e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.721194e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.056652e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.485221 sec -INFO: No Floating Point Exceptions have been reported - 2,023,639,378 cycles # 2.841 GHz - 2,891,046,466 instructions # 1.43 insn per cycle - 0.769493206 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028811e+00 -Avg ME (F77/GPU) = 2.0288499749731272 -Relative difference = 1.9210746159747678e-05 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.614708e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.719370e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.719370e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.097789 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.588851e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.729160e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.729160e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 3.036980 sec INFO: No Floating Point Exceptions have been reported - 11,755,034,517 cycles # 2.866 GHz - 35,108,588,793 instructions # 2.99 insn per cycle - 4.103114971 seconds time elapsed + 10,627,447,348 cycles # 3.497 GHz + 35,130,770,567 instructions # 3.31 insn per cycle + 3.039654652 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 470) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288199094356969 Relative difference = 4.463890496342449e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.332294e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.809853e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.809853e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.053683 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 7.183811e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.782224e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.782224e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 1.574332 sec INFO: No Floating Point Exceptions have been reported - 5,951,415,517 cycles # 2.891 GHz - 14,470,123,335 instructions # 2.43 insn per cycle - 2.059025817 seconds time elapsed + 5,512,574,555 cycles # 3.496 GHz + 14,476,901,478 instructions # 2.63 insn per cycle + 1.577023405 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2572) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193583255634 Relative difference = 1.7661780742548925e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.326940e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.191185e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.191185e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.518155 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.001680e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.107747e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.107747e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 +TOTAL : 1.156263 sec INFO: No Floating Point Exceptions have been reported - 4,152,217,913 cycles # 2.727 GHz - 8,874,854,960 instructions # 2.14 insn per cycle - 1.523530355 seconds time elapsed + 3,862,044,679 cycles # 3.333 GHz + 8,855,760,137 instructions # 2.29 insn per cycle + 1.159048658 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3574) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288182107033208 Relative difference = 1.0385521077446488e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.326335e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.192412e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.192412e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.518142 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 9.511281e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.045917e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.045917e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 +TOTAL : 1.213783 sec INFO: No Floating Point Exceptions have been reported - 4,138,145,120 cycles # 2.717 GHz - 8,411,511,000 instructions # 2.03 insn per cycle - 1.523559219 seconds time elapsed + 4,051,985,329 cycles # 3.332 GHz + 8,359,035,113 instructions # 2.06 insn per cycle + 1.216495377 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3319) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288182107033208 Relative difference = 1.0385521077446488e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.337364e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.777859e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.777859e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 2.053123 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.090474e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.221042e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.221042e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 1.073137 sec INFO: No Floating Point Exceptions have been reported - 3,784,038,038 cycles # 1.840 GHz - 7,702,433,783 instructions # 2.04 insn per cycle - 2.058532499 seconds time elapsed + 3,583,841,042 cycles # 3.332 GHz + 7,646,282,047 instructions # 2.13 insn per cycle + 1.075899915 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3440) (512y: 0) (512z: 2107) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183204829693 Relative difference = 1.5796536184903122e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index ff88d5da2d..6068583176 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_14:36:32 +DATE: 2024-05-16_14:56:37 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.198792e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.180605e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.275668e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.521467 sec -INFO: No Floating Point Exceptions have been reported - 2,143,649,339 cycles # 2.843 GHz - 3,098,162,725 instructions # 1.45 insn per cycle - 0.810608393 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063423243874 -Relative difference = 3.241686432649386e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.033714e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.092456e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.092456e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.262850 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.746015e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.818179e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.818179e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 3.946181 sec INFO: No Floating Point Exceptions have been reported - 15,278,986,093 cycles # 2.901 GHz - 38,575,389,182 instructions # 2.52 insn per cycle - 5.268064562 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 672) (avx2: 0) (512y: 0) (512z: 0) + 13,806,209,783 cycles # 3.496 GHz + 38,515,589,460 instructions # 2.79 insn per cycle + 3.948957492 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 678) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.527314e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.723139e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.723139e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.080390 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.896530e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.148454e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.148454e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.267633 sec INFO: No Floating Point Exceptions have been reported - 8,961,614,258 cycles # 2.906 GHz - 24,226,315,758 instructions # 2.70 insn per cycle - 3.085434765 seconds time elapsed + 7,935,743,710 cycles # 3.496 GHz + 24,216,719,982 instructions # 3.05 insn per cycle + 2.270534850 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2188) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.613394e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.100134e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.100134e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.976346 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 7.984920e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.657115e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.657115e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.441248 sec INFO: No Floating Point Exceptions have been reported - 5,394,338,439 cycles # 2.724 GHz - 11,277,527,499 instructions # 2.09 insn per cycle - 1.981499886 seconds time elapsed + 4,807,387,010 cycles # 3.330 GHz + 11,202,723,427 instructions # 2.33 insn per cycle + 1.444162110 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2480) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288064057068964 Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.276948e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.897611e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.897611e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.778784 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 8.277957e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.991395e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.991395e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.389888 sec INFO: No Floating Point Exceptions have been reported - 4,855,499,941 cycles # 2.723 GHz - 10,526,571,188 instructions # 2.17 insn per cycle - 1.784170390 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2167) (512y: 148) (512z: 0) + 4,638,113,998 cycles # 3.331 GHz + 10,547,744,593 instructions # 2.27 insn per cycle + 1.392738428 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2218) (512y: 128) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288064057068964 Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.815864e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.036087e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.036087e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.856419 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 8.526422e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.287794e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.287794e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.353338 sec INFO: No Floating Point Exceptions have been reported - 5,199,981,370 cycles # 1.818 GHz - 7,603,665,117 instructions # 1.46 insn per cycle - 2.861804972 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1633) (512y: 126) (512z: 1608) + 4,510,300,771 cycles # 3.326 GHz + 7,441,315,374 instructions # 1.65 insn per cycle + 1.356176926 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1626) (512y: 104) (512z: 1606) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288064057068964 Relative difference = 2.9292737240031234e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt index 1d76304278..0a2e8b6ea9 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_14:36:55 +DATE: 2024-05-16_14:56:49 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.208651e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.184994e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.280716e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.522551 sec -INFO: No Floating Point Exceptions have been reported - 2,145,230,616 cycles # 2.840 GHz - 3,093,123,772 instructions # 1.44 insn per cycle - 0.812278354 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063423243874 -Relative difference = 3.241686432649386e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.021911e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.079930e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.079930e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.294031 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.687332e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.756728e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.756728e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 4.029679 sec INFO: No Floating Point Exceptions have been reported - 15,341,153,400 cycles # 2.896 GHz - 40,370,282,827 instructions # 2.63 insn per cycle - 5.299425936 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 669) (avx2: 0) (512y: 0) (512z: 0) + 14,101,428,195 cycles # 3.497 GHz + 40,344,626,403 instructions # 2.86 insn per cycle + 4.032520152 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 683) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.710012e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.926494e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.926494e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.935201 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 5.095887e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.369304e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.369304e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.183673 sec INFO: No Floating Point Exceptions have been reported - 8,515,314,447 cycles # 2.897 GHz - 23,253,613,819 instructions # 2.73 insn per cycle - 2.940392108 seconds time elapsed + 7,645,788,462 cycles # 3.497 GHz + 23,246,893,330 instructions # 3.04 insn per cycle + 2.186554544 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2091) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.780066e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.132607e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.132607e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.302191 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 6.825873e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.312613e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.312613e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.663253 sec INFO: No Floating Point Exceptions have been reported - 6,262,262,467 cycles # 2.715 GHz - 12,962,490,062 instructions # 2.07 insn per cycle - 2.307689771 seconds time elapsed + 5,540,656,190 cycles # 3.326 GHz + 12,888,464,114 instructions # 2.33 insn per cycle + 1.666087029 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2669) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288064057068964 Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.109643e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.511847e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.511847e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.160493 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 7.204420e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.743180e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.743180e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.579391 sec INFO: No Floating Point Exceptions have been reported - 5,903,466,716 cycles # 2.727 GHz - 12,238,680,442 instructions # 2.07 insn per cycle - 2.165768560 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2209) (512y: 296) (512z: 0) + 5,263,221,323 cycles # 3.327 GHz + 12,084,809,163 instructions # 2.30 insn per cycle + 1.582218339 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2210) (512y: 296) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288064057068964 Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.507940e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.694154e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.694154e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.097205 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 8.184283e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.882775e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.882775e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.404252 sec INFO: No Floating Point Exceptions have been reported - 5,614,268,818 cycles # 1.810 GHz - 8,744,074,840 instructions # 1.56 insn per cycle - 3.102417520 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1490) (512y: 183) (512z: 1909) + 4,678,120,778 cycles # 3.325 GHz + 8,673,946,741 instructions # 1.85 insn per cycle + 1.407067408 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1501) (512y: 173) (512z: 1908) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288064057068964 Relative difference = 2.9292737240031234e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index 1d7490861d..b67eec5b5f 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -1,229 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-16_14:37:19 +DATE: 2024-05-16_14:57:01 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.992211e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.047041e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.061161e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.469322 sec -INFO: No Floating Point Exceptions have been reported - 1,970,950,644 cycles # 2.853 GHz - 2,836,233,202 instructions # 1.44 insn per cycle - 0.747868437 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.129686e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.329949e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.341716e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.607217 sec -INFO: No Floating Point Exceptions have been reported - 2,397,125,482 cycles # 2.825 GHz - 3,658,262,516 instructions # 1.53 insn per cycle - 0.909559944 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213684418649 -Relative difference = 4.469239988637851e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.379379e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.391311e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.391311e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.910347 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.217492e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.232488e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.232488e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 5.118007 sec INFO: No Floating Point Exceptions have been reported - 19,789,020,586 cycles # 2.863 GHz - 59,609,829,111 instructions # 3.01 insn per cycle - 6.914699001 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1466) (avx2: 0) (512y: 0) (512z: 0) + 17,903,778,105 cycles # 3.497 GHz + 59,484,835,491 instructions # 3.32 insn per cycle + 5.119663538 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1439) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.619966e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.665049e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.665049e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.569460 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 6.227259e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.282790e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.282790e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 2.655687 sec INFO: No Floating Point Exceptions have been reported - 10,374,266,250 cycles # 2.904 GHz - 30,674,256,165 instructions # 2.96 insn per cycle - 3.573646642 seconds time elapsed + 9,284,800,953 cycles # 3.494 GHz + 30,662,235,138 instructions # 3.30 insn per cycle + 2.657382930 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 5153) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.120184e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.293257e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.293257e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.820051 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.305459e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.328152e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.328152e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.279118 sec INFO: No Floating Point Exceptions have been reported - 4,901,380,147 cycles # 2.688 GHz - 11,019,047,598 instructions # 2.25 insn per cycle - 1.824311195 seconds time elapsed + 4,231,801,732 cycles # 3.305 GHz + 11,003,256,880 instructions # 2.60 insn per cycle + 1.280874454 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4467) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.028182e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.049956e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.049956e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.616748 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.381557e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.407014e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.407014e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.210453 sec INFO: No Floating Point Exceptions have been reported - 4,378,615,331 cycles # 2.702 GHz - 10,296,117,856 instructions # 2.35 insn per cycle - 1.621129053 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4137) (512y: 91) (512z: 0) + 4,005,080,732 cycles # 3.305 GHz + 10,337,496,214 instructions # 2.58 insn per cycle + 1.212133030 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4220) (512y: 57) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.954224e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.056280e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.056280e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.381742 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.914830e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.964196e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.964196e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 0.880242 sec INFO: No Floating Point Exceptions have been reported - 4,108,596,097 cycles # 1.723 GHz - 5,842,404,115 instructions # 1.42 insn per cycle - 2.385936782 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1540) (512y: 95) (512z: 3466) + 2,911,648,623 cycles # 3.303 GHz + 5,792,517,541 instructions # 1.99 insn per cycle + 0.881891759 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1544) (512y: 63) (512z: 3463) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt index 45a1ef164b..83df88d84d 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt @@ -1,246 +1,188 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-16_15:01:40 +DATE: 2024-05-16_15:13:03 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.535443e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.780857e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.780857e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.503670 sec -INFO: No Floating Point Exceptions have been reported - 2,012,376,201 cycles # 2.812 GHz - 3,006,218,540 instructions # 1.49 insn per cycle - 0.774572160 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.606024e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.624765e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.624765e+06 ) sec^-1 -MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.841754 sec -INFO: No Floating Point Exceptions have been reported - 3,099,668,806 cycles # 2.832 GHz - 4,993,276,525 instructions # 1.61 insn per cycle - 1.155254157 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213684418649 -Relative difference = 4.469239988637851e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.380068e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.392068e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.392068e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.915910 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.220075e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.235259e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.235259e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 5.118042 sec INFO: No Floating Point Exceptions have been reported - 19,806,579,322 cycles # 2.863 GHz - 59,611,012,266 instructions # 3.01 insn per cycle - 6.920308116 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1466) (avx2: 0) (512y: 0) (512z: 0) + 17,911,933,736 cycles # 3.499 GHz + 59,489,757,567 instructions # 3.32 insn per cycle + 5.119926420 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1439) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.550339e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.594733e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.594733e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.631431 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 6.224279e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.279799e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.279799e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 2.661489 sec INFO: No Floating Point Exceptions have been reported - 10,404,134,292 cycles # 2.862 GHz - 30,722,305,980 instructions # 2.95 insn per cycle - 3.635916319 seconds time elapsed + 9,312,150,160 cycles # 3.497 GHz + 30,709,998,487 instructions # 3.30 insn per cycle + 2.663374930 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 5153) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.991824e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.166141e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.166141e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.853778 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.303785e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.326328e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.326328e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.285845 sec INFO: No Floating Point Exceptions have been reported - 4,943,570,309 cycles # 2.661 GHz - 11,067,752,215 instructions # 2.24 insn per cycle - 1.858370590 seconds time elapsed + 4,255,534,359 cycles # 3.305 GHz + 11,054,510,411 instructions # 2.60 insn per cycle + 1.287820536 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4467) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.005140e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.026682e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.026682e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.662867 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.379267e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.404698e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.404698e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.217248 sec INFO: No Floating Point Exceptions have been reported - 4,426,260,539 cycles # 2.656 GHz - 10,346,882,831 instructions # 2.34 insn per cycle - 1.667431238 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4137) (512y: 91) (512z: 0) + 4,029,105,842 cycles # 3.306 GHz + 10,386,401,263 instructions # 2.58 insn per cycle + 1.219158432 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4220) (512y: 57) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.832038e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.932754e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.932754e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.431611 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.912058e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.960855e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.960855e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 0.886228 sec INFO: No Floating Point Exceptions have been reported - 4,145,808,516 cycles # 1.702 GHz - 5,880,428,508 instructions # 1.42 insn per cycle - 2.436095886 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1540) (512y: 95) (512z: 3466) + 2,935,578,198 cycles # 3.307 GHz + 5,830,336,340 instructions # 1.99 insn per cycle + 0.888122471 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1544) (512y: 63) (512z: 3463) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt index c8d4c1d012..54930f8d73 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt @@ -1,229 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-16_14:37:45 +DATE: 2024-05-16_14:57:14 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.984938e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.044546e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.056865e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.468609 sec -INFO: No Floating Point Exceptions have been reported - 1,981,002,182 cycles # 2.846 GHz - 2,842,945,772 instructions # 1.44 insn per cycle - 0.752497111 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.119070e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.315352e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.326681e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.601408 sec -INFO: No Floating Point Exceptions have been reported - 2,383,936,937 cycles # 2.851 GHz - 3,651,729,049 instructions # 1.53 insn per cycle - 0.896728355 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213684418649 -Relative difference = 4.469239988637851e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.454763e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.467389e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.467389e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.698212 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.260516e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.276117e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.276117e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 5.051336 sec INFO: No Floating Point Exceptions have been reported - 19,500,935,732 cycles # 2.911 GHz - 58,799,003,967 instructions # 3.02 insn per cycle - 6.702449206 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1313) (avx2: 0) (512y: 0) (512z: 0) + 17,654,492,955 cycles # 3.494 GHz + 58,797,490,071 instructions # 3.33 insn per cycle + 5.053066480 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1323) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.669930e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.715854e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.715854e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.531511 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 6.250080e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.306668e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.306668e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 2.646454 sec INFO: No Floating Point Exceptions have been reported - 10,228,095,464 cycles # 2.894 GHz - 30,347,180,891 instructions # 2.97 insn per cycle - 3.535798492 seconds time elapsed + 9,254,451,969 cycles # 3.495 GHz + 30,333,424,248 instructions # 3.28 insn per cycle + 2.648175602 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 4970) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.789972e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.950829e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.950829e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.887432 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.250886e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.271590e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.271590e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.333796 sec INFO: No Floating Point Exceptions have been reported - 5,055,118,079 cycles # 2.674 GHz - 11,484,444,983 instructions # 2.27 insn per cycle - 1.891612421 seconds time elapsed + 4,412,203,557 cycles # 3.304 GHz + 11,467,908,522 instructions # 2.60 insn per cycle + 1.335544581 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4591) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.667837e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.860484e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.860484e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.718788 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.318979e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.342183e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.342183e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.266599 sec INFO: No Floating Point Exceptions have been reported - 4,655,858,880 cycles # 2.704 GHz - 10,842,096,596 instructions # 2.33 insn per cycle - 1.722993406 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4183) (512y: 244) (512z: 0) + 4,190,277,758 cycles # 3.305 GHz + 10,842,214,694 instructions # 2.59 insn per cycle + 1.268323708 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4215) (512y: 230) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.981237e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.082937e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.082937e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.372571 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.895707e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.943459e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.943459e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 0.888731 sec INFO: No Floating Point Exceptions have been reported - 4,129,142,877 cycles # 1.738 GHz - 6,106,185,085 instructions # 1.48 insn per cycle - 2.376879303 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1457) (512y: 139) (512z: 3568) + 2,941,343,592 cycles # 3.304 GHz + 6,060,826,924 instructions # 2.06 insn per cycle + 0.890391590 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1464) (512y: 118) (512z: 3566) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index e4bc7cf2cc..50861b85cf 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -1,229 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-16_14:38:10 +DATE: 2024-05-16_14:57:27 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.514552e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.271085e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.366020e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 -TOTAL : 0.450662 sec -INFO: No Floating Point Exceptions have been reported - 1,888,418,045 cycles # 2.834 GHz - 2,686,004,303 instructions # 1.42 insn per cycle - 0.722549365 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 254 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.424662e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.459806e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.527254e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.630099e+02 +- 4.770719e+02 ) GeV^-2 -TOTAL : 0.495261 sec -INFO: No Floating Point Exceptions have been reported - 2,099,817,827 cycles # 2.862 GHz - 2,990,738,948 instructions # 1.42 insn per cycle - 0.790419941 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.412608e+00 -Avg ME (F77/GPU) = 1.4132214346515752 -Relative difference = 0.00043425681546129636 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.505220e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.518346e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.518346e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.562288 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.303784e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.320597e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.320597e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.724764e+02 +- 2.665343e+02 ) GeV^-2 +TOTAL : 4.984157 sec INFO: No Floating Point Exceptions have been reported - 19,080,957,547 cycles # 2.906 GHz - 58,959,648,789 instructions # 3.09 insn per cycle - 6.566573323 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1034) (avx2: 0) (512y: 0) (512z: 0) + 17,430,828,591 cycles # 3.497 GHz + 58,906,467,913 instructions # 3.38 insn per cycle + 4.985748040 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1027) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.412986e+00 Avg ME (F77/C++) = 1.4129858051842916 Relative difference = 1.3787518662898538e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.204155e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.352745e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.352745e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 2.018056 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.100286e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.119257e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.119257e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.724758e+02 +- 2.665339e+02 ) GeV^-2 +TOTAL : 1.512107 sec INFO: No Floating Point Exceptions have been reported - 5,861,245,947 cycles # 2.899 GHz - 16,693,370,121 instructions # 2.85 insn per cycle - 2.022246601 seconds time elapsed + 5,290,980,353 cycles # 3.496 GHz + 16,686,581,131 instructions # 3.15 insn per cycle + 1.513784932 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 5765) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.412987e+00 Avg ME (F77/C++) = 1.4129865669244737 Relative difference = 3.06496469061158e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.747206e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.811751e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.811751e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 -TOTAL : 0.959718 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.454877e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.549512e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.549512e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.743722e+02 +- 2.676604e+02 ) GeV^-2 +TOTAL : 0.690038 sec INFO: No Floating Point Exceptions have been reported - 2,597,973,759 cycles # 2.697 GHz - 5,979,816,432 instructions # 2.30 insn per cycle - 0.963957244 seconds time elapsed + 2,286,098,444 cycles # 3.307 GHz + 5,966,861,474 instructions # 2.61 insn per cycle + 0.691674223 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4917) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 Avg ME (F77/C++) = 1.4133161655815059 Relative difference = 1.1715816267550621e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.928786e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.008064e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.008064e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 -TOTAL : 0.871454 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.609633e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.709894e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.709894e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.743722e+02 +- 2.676604e+02 ) GeV^-2 +TOTAL : 0.651107 sec INFO: No Floating Point Exceptions have been reported - 2,346,801,151 cycles # 2.682 GHz - 5,601,970,539 instructions # 2.39 insn per cycle - 0.875813732 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4645) (512y: 36) (512z: 0) + 2,157,337,429 cycles # 3.307 GHz + 5,614,584,835 instructions # 2.60 insn per cycle + 0.652720509 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4726) (512y: 2) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 Avg ME (F77/C++) = 1.4133161655815059 Relative difference = 1.1715816267550621e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.412327e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.455439e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.455439e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.184240 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.927137e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.160943e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.160943e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.743730e+02 +- 2.676609e+02 ) GeV^-2 +TOTAL : 0.440370 sec INFO: No Floating Point Exceptions have been reported - 2,059,493,323 cycles # 1.734 GHz - 3,333,364,881 instructions # 1.62 insn per cycle - 1.188531798 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2144) (512y: 39) (512z: 3675) + 1,459,851,828 cycles # 3.305 GHz + 3,301,929,201 instructions # 2.26 insn per cycle + 0.441970363 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2157) (512y: 7) (512z: 3674) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 Avg ME (F77/C++) = 1.4133164033579249 Relative difference = 2.85398258307829e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt index d735dc5897..5daaab0eb2 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt @@ -1,246 +1,188 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-16_15:02:06 +DATE: 2024-05-16_15:13:15 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.750186e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.085490e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.085490e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.009071e+02 +- 5.002295e+01 ) GeV^-2 -TOTAL : 0.469338 sec -INFO: No Floating Point Exceptions have been reported - 1,918,362,944 cycles # 2.804 GHz - 2,834,169,916 instructions # 1.48 insn per cycle - 0.742178075 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 254 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.524122e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.570005e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.570005e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.737500e+02 +- 4.776370e+02 ) GeV^-2 -TOTAL : 0.651816 sec -INFO: No Floating Point Exceptions have been reported - 2,503,160,784 cycles # 2.822 GHz - 3,832,792,162 instructions # 1.53 insn per cycle - 0.943470239 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.412608e+00 -Avg ME (F77/GPU) = 1.4132214346515752 -Relative difference = 0.00043425681546129636 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.465694e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.479110e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.479110e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.671662 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.307510e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.324633e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.324633e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.724764e+02 +- 2.665343e+02 ) GeV^-2 +TOTAL : 4.980490 sec INFO: No Floating Point Exceptions have been reported - 19,108,337,453 cycles # 2.863 GHz - 58,967,331,894 instructions # 3.09 insn per cycle - 6.675976597 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1034) (avx2: 0) (512y: 0) (512z: 0) + 17,424,453,776 cycles # 3.497 GHz + 58,911,014,988 instructions # 3.38 insn per cycle + 4.982343398 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1027) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.412986e+00 Avg ME (F77/C++) = 1.4129858051842916 Relative difference = 1.3787518662898538e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.093089e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.238027e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.238027e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 2.051178 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.092074e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.110752e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.110752e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.724758e+02 +- 2.665339e+02 ) GeV^-2 +TOTAL : 1.526769 sec INFO: No Floating Point Exceptions have been reported - 5,880,119,320 cycles # 2.862 GHz - 16,741,679,626 instructions # 2.85 insn per cycle - 2.055508197 seconds time elapsed + 5,340,998,015 cycles # 3.495 GHz + 16,734,817,199 instructions # 3.13 insn per cycle + 1.528515210 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 5765) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.412987e+00 Avg ME (F77/C++) = 1.4129865669244737 Relative difference = 3.06496469061158e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.718905e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.782305e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.782305e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 -TOTAL : 0.980043 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.451134e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.545641e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.545641e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.743722e+02 +- 2.676604e+02 ) GeV^-2 +TOTAL : 0.693955 sec INFO: No Floating Point Exceptions have been reported - 2,616,418,693 cycles # 2.660 GHz - 6,017,096,104 instructions # 2.30 insn per cycle - 0.984343134 seconds time elapsed + 2,299,980,015 cycles # 3.307 GHz + 6,004,079,961 instructions # 2.61 insn per cycle + 0.695744167 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4917) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 Avg ME (F77/C++) = 1.4133161655815059 Relative difference = 1.1715816267550621e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.912882e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.991175e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.991175e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 -TOTAL : 0.883189 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.603457e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.705544e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.705544e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.743722e+02 +- 2.676604e+02 ) GeV^-2 +TOTAL : 0.655444 sec INFO: No Floating Point Exceptions have been reported - 2,365,822,002 cycles # 2.667 GHz - 5,638,771,692 instructions # 2.38 insn per cycle - 0.887626463 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4645) (512y: 36) (512z: 0) + 2,172,433,478 cycles # 3.307 GHz + 5,651,609,485 instructions # 2.60 insn per cycle + 0.657170212 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4726) (512y: 2) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 Avg ME (F77/C++) = 1.4133161655815059 Relative difference = 1.1715816267550621e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.399129e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.441231e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.441231e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.200076 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.911036e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.142668e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.142668e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.743730e+02 +- 2.676609e+02 ) GeV^-2 +TOTAL : 0.445137 sec INFO: No Floating Point Exceptions have been reported - 2,081,452,605 cycles # 1.729 GHz - 3,374,965,036 instructions # 1.62 insn per cycle - 1.204429196 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2144) (512y: 39) (512z: 3675) + 1,476,582,170 cycles # 3.306 GHz + 3,343,481,743 instructions # 2.26 insn per cycle + 0.446859671 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2157) (512y: 7) (512z: 3674) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 Avg ME (F77/C++) = 1.4133164033579249 Relative difference = 2.85398258307829e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt index 3d41e21b12..282ec8ea18 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt @@ -1,229 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-16_14:38:31 +DATE: 2024-05-16_14:57:36 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.548366e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.290418e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.382374e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 -TOTAL : 0.453301 sec -INFO: No Floating Point Exceptions have been reported - 1,884,361,235 cycles # 2.811 GHz - 2,662,129,036 instructions # 1.41 insn per cycle - 0.727401829 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 248 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.381856e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.386346e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.451907e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.630099e+02 +- 4.770719e+02 ) GeV^-2 -TOTAL : 0.498921 sec -INFO: No Floating Point Exceptions have been reported - 2,065,776,106 cycles # 2.820 GHz - 3,002,526,593 instructions # 1.45 insn per cycle - 0.789720140 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.412608e+00 -Avg ME (F77/GPU) = 1.4132214346515752 -Relative difference = 0.00043425681546129636 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.479714e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.492704e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.492704e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.641350 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.327802e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.344804e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.344804e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.724764e+02 +- 2.665343e+02 ) GeV^-2 +TOTAL : 4.947724 sec INFO: No Floating Point Exceptions have been reported - 18,978,826,784 cycles # 2.861 GHz - 58,704,221,037 instructions # 3.09 insn per cycle - 6.645410970 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1029) (avx2: 0) (512y: 0) (512z: 0) + 17,302,620,334 cycles # 3.496 GHz + 58,674,598,268 instructions # 3.39 insn per cycle + 4.949311337 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1024) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.412986e+00 Avg ME (F77/C++) = 1.4129858051842916 Relative difference = 1.3787518662898538e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.494310e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.651898e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.651898e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 1.950028 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.166408e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.187803e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.187803e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.724758e+02 +- 2.665339e+02 ) GeV^-2 +TOTAL : 1.427590 sec INFO: No Floating Point Exceptions have been reported - 5,589,974,968 cycles # 2.862 GHz - 16,510,304,699 instructions # 2.95 insn per cycle - 1.954264273 seconds time elapsed + 4,998,564,906 cycles # 3.498 GHz + 16,503,246,104 instructions # 3.30 insn per cycle + 1.429196094 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 5551) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.412987e+00 Avg ME (F77/C++) = 1.4129865669244737 Relative difference = 3.06496469061158e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.496639e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.543532e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.543532e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 -TOTAL : 1.116418 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.136581e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.209217e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.209217e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.743722e+02 +- 2.676604e+02 ) GeV^-2 +TOTAL : 0.789359 sec INFO: No Floating Point Exceptions have been reported - 2,975,820,242 cycles # 2.657 GHz - 6,633,799,194 instructions # 2.23 insn per cycle - 1.120575232 seconds time elapsed + 2,614,043,577 cycles # 3.306 GHz + 6,621,157,777 instructions # 2.53 insn per cycle + 0.790993368 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5568) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 Avg ME (F77/C++) = 1.4133161655815059 Relative difference = 1.1715816267550621e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.615016e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.669374e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.669374e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 -TOTAL : 1.036246 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.275700e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.351645e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.351645e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.743722e+02 +- 2.676604e+02 ) GeV^-2 +TOTAL : 0.743100 sec INFO: No Floating Point Exceptions have been reported - 2,759,204,529 cycles # 2.654 GHz - 6,255,102,481 instructions # 2.27 insn per cycle - 1.040401186 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5279) (512y: 25) (512z: 0) + 2,460,811,549 cycles # 3.306 GHz + 6,244,546,313 instructions # 2.54 insn per cycle + 0.744691831 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5305) (512y: 11) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 Avg ME (F77/C++) = 1.4133161655815059 Relative difference = 1.1715816267550621e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.286831e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.322123e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.322123e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.300128 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.470657e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.656155e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.656155e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.743730e+02 +- 2.676609e+02 ) GeV^-2 +TOTAL : 0.495123 sec INFO: No Floating Point Exceptions have been reported - 2,231,395,652 cycles # 1.715 GHz - 3,699,704,768 instructions # 1.66 insn per cycle - 1.304305216 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2391) (512y: 29) (512z: 3970) + 1,640,921,568 cycles # 3.305 GHz + 3,669,320,401 instructions # 2.24 insn per cycle + 0.496729732 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2402) (512y: 9) (512z: 3969) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 Avg ME (F77/C++) = 1.4133164033579249 Relative difference = 2.85398258307829e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index 18990368c8..fe8c55c044 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -1,229 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-16_14:38:52 +DATE: 2024-05-16_14:57:46 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.980776e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.047318e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.059891e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.469526 sec -INFO: No Floating Point Exceptions have been reported - 1,950,532,568 cycles # 2.815 GHz - 2,802,706,395 instructions # 1.44 insn per cycle - 0.749158155 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.120585e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.317479e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.329114e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.606344 sec -INFO: No Floating Point Exceptions have been reported - 2,403,151,636 cycles # 2.824 GHz - 3,669,339,361 instructions # 1.53 insn per cycle - 0.910110717 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213755569487 -Relative difference = 4.418889885423659e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.348054e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.359694e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.359694e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 7.005029 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.157731e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.172249e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.172249e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 5.214643 sec INFO: No Floating Point Exceptions have been reported - 20,055,951,018 cycles # 2.863 GHz - 60,536,467,053 instructions # 3.02 insn per cycle - 7.009312607 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1399) (avx2: 0) (512y: 0) (512z: 0) + 18,242,480,003 cycles # 3.498 GHz + 60,528,165,591 instructions # 3.32 insn per cycle + 5.216356655 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1404) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213859069593 Relative difference = 4.345647726386255e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.638770e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.684822e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.684822e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.556206 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 6.323704e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.380967e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.380967e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 2.615582 sec INFO: No Floating Point Exceptions have been reported - 10,186,602,629 cycles # 2.862 GHz - 30,386,009,701 instructions # 2.98 insn per cycle - 3.560429335 seconds time elapsed + 9,151,647,030 cycles # 3.497 GHz + 30,371,965,788 instructions # 3.32 insn per cycle + 2.617287619 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 5280) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213792564823 Relative difference = 4.392710025734405e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.050822e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.223334e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.223334e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.833811 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.309588e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.332285e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.332285e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.275161 sec INFO: No Floating Point Exceptions have been reported - 4,877,548,863 cycles # 2.655 GHz - 10,978,535,397 instructions # 2.25 insn per cycle - 1.838126466 seconds time elapsed + 4,218,419,523 cycles # 3.305 GHz + 10,962,823,763 instructions # 2.60 insn per cycle + 1.276815643 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4624) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213600217192 Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.034701e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.056812e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.056812e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.607005 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.409942e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.436943e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.436943e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.186720 sec INFO: No Floating Point Exceptions have been reported - 4,285,859,041 cycles # 2.661 GHz - 10,248,085,853 instructions # 2.39 insn per cycle - 1.611327735 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4280) (512y: 82) (512z: 0) + 3,926,467,492 cycles # 3.305 GHz + 10,288,751,023 instructions # 2.62 insn per cycle + 1.188400202 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4362) (512y: 48) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213600217192 Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.675038e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.769490e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.769490e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.480681 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.835113e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.880105e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.880105e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 0.917845 sec INFO: No Floating Point Exceptions have been reported - 4,211,204,679 cycles # 1.695 GHz - 6,044,041,090 instructions # 1.44 insn per cycle - 2.485018889 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2066) (512y: 117) (512z: 3540) + 3,037,911,511 cycles # 3.305 GHz + 5,995,105,780 instructions # 1.97 insn per cycle + 0.919563401 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2070) (512y: 85) (512z: 3537) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213786174055 Relative difference = 4.3972324717191576e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt index bea6b18082..ea2522f723 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt @@ -1,229 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-16_14:39:18 +DATE: 2024-05-16_14:57:59 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.940348e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.041869e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.054764e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.469154 sec -INFO: No Floating Point Exceptions have been reported - 1,946,414,728 cycles # 2.818 GHz - 2,803,423,086 instructions # 1.44 insn per cycle - 0.748059256 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.116866e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.312173e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.323463e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.604157 sec -INFO: No Floating Point Exceptions have been reported - 2,374,249,289 cycles # 2.818 GHz - 3,602,148,119 instructions # 1.52 insn per cycle - 0.902621411 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213755569487 -Relative difference = 4.418889885423659e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.368504e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.380280e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.380280e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.941323 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.201508e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.216429e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.216429e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 5.143575 sec INFO: No Floating Point Exceptions have been reported - 19,878,296,626 cycles # 2.863 GHz - 59,936,362,271 instructions # 3.02 insn per cycle - 6.945573140 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1276) (avx2: 0) (512y: 0) (512z: 0) + 17,998,527,032 cycles # 3.498 GHz + 59,876,955,320 instructions # 3.33 insn per cycle + 5.145249276 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1262) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213859069593 Relative difference = 4.345647726386255e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.689994e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.736297e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.736297e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.516340 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 6.348649e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.406269e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.406269e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 2.605142 sec INFO: No Floating Point Exceptions have been reported - 10,077,314,757 cycles # 2.863 GHz - 30,098,117,657 instructions # 2.99 insn per cycle - 3.520635536 seconds time elapsed + 9,114,931,082 cycles # 3.497 GHz + 30,051,323,374 instructions # 3.30 insn per cycle + 2.606780461 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 5082) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213792564823 Relative difference = 4.392710025734405e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.778247e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.940877e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.940877e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.889938 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.251964e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.272881e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.272881e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.332616 sec INFO: No Floating Point Exceptions have been reported - 5,023,754,472 cycles # 2.654 GHz - 11,483,522,538 instructions # 2.29 insn per cycle - 1.894205310 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4723) (512y: 0) (512z: 0) + 4,407,815,981 cycles # 3.304 GHz + 11,447,290,279 instructions # 2.60 insn per cycle + 1.334290522 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4717) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213600217192 Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.644687e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.842226e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.842226e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.722080 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.341500e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.365518e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.365518e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.245709 sec INFO: No Floating Point Exceptions have been reported - 4,590,091,342 cycles # 2.660 GHz - 10,809,457,257 instructions # 2.35 insn per cycle - 1.726406566 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4285) (512y: 234) (512z: 0) + 4,121,210,175 cycles # 3.305 GHz + 10,792,918,022 instructions # 2.62 insn per cycle + 1.247343751 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4317) (512y: 220) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213600217192 Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.641517e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.735645e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.735645e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.492729 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.840437e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.885679e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.885679e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 0.914759 sec INFO: No Floating Point Exceptions have been reported - 4,229,101,372 cycles # 1.695 GHz - 6,273,394,761 instructions # 1.48 insn per cycle - 2.496999493 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1961) (512y: 163) (512z: 3617) + 3,024,372,236 cycles # 3.301 GHz + 6,223,411,017 instructions # 2.06 insn per cycle + 0.916440024 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1968) (512y: 142) (512z: 3615) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213786174055 Relative difference = 4.3972324717191576e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index adf6424639..2c0f8eb99e 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -1,229 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_14:39:44 +DATE: 2024-05-16_14:58:12 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.453895e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.477096e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.479397e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.529744 sec -INFO: No Floating Point Exceptions have been reported - 2,179,317,048 cycles # 2.822 GHz - 3,403,036,461 instructions # 1.56 insn per cycle - 0.830470867 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.124157e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.151338e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.152519e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.042150 sec -INFO: No Floating Point Exceptions have been reported - 9,405,604,432 cycles # 2.853 GHz - 20,118,562,201 instructions # 2.14 insn per cycle - 3.353608047 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158133E-004 -Relative difference = 2.837296512218831e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.820592e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.821434e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.821434e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 9.018372 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.489421e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.490651e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.490651e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 6.595744 sec INFO: No Floating Point Exceptions have been reported - 25,614,013,948 cycles # 2.839 GHz - 78,938,013,495 instructions # 3.08 insn per cycle - 9.022664733 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4893) (avx2: 0) (512y: 0) (512z: 0) + 23,075,412,130 cycles # 3.498 GHz + 78,768,945,704 instructions # 3.41 insn per cycle + 6.597385330 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4856) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.519494e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.522699e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.522699e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.669138 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.824949e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.829515e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.829515e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 3.406808 sec INFO: No Floating Point Exceptions have been reported - 12,898,966,245 cycles # 2.761 GHz - 39,280,150,365 instructions # 3.05 insn per cycle - 4.673492352 seconds time elapsed + 11,657,794,705 cycles # 3.421 GHz + 39,273,323,651 instructions # 3.37 insn per cycle + 3.408459486 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13184) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.859599e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.875346e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.875346e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.097013 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.090711e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.092872e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.092872e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.510847 sec INFO: No Floating Point Exceptions have been reported - 5,574,685,577 cycles # 2.655 GHz - 13,685,856,406 instructions # 2.46 insn per cycle - 2.101249976 seconds time elapsed + 4,990,714,987 cycles # 3.300 GHz + 13,680,690,805 instructions # 2.74 insn per cycle + 1.512580097 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.915800e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.935807e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.935807e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.848754 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.278297e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.281313e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.281313e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.290325 sec INFO: No Floating Point Exceptions have been reported - 4,887,101,603 cycles # 2.639 GHz - 12,341,123,817 instructions # 2.53 insn per cycle - 1.853060894 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) + 4,262,825,132 cycles # 3.300 GHz + 12,349,048,684 instructions # 2.90 insn per cycle + 1.292007253 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10349) (512y: 54) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.728417e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.739729e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.739729e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.446881 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.025518e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.033177e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.033177e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 0.817137 sec INFO: No Floating Point Exceptions have been reported - 4,107,098,137 cycles # 1.676 GHz - 6,336,202,498 instructions # 1.54 insn per cycle - 2.451096147 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) + 2,698,939,507 cycles # 3.297 GHz + 6,319,480,914 instructions # 2.34 insn per cycle + 0.818836639 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1825) (512y: 70) (512z: 9372) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt index 92636e2555..4cd7fe1c8c 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt @@ -1,246 +1,188 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_15:02:53 +DATE: 2024-05-16_15:13:33 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.094987e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.434034e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.434034e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.523594 sec -INFO: No Floating Point Exceptions have been reported - 2,118,517,608 cycles # 2.813 GHz - 3,348,276,596 instructions # 1.58 insn per cycle - 0.813391390 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.622834e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.121853e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.121853e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.329713 sec -INFO: No Floating Point Exceptions have been reported - 10,291,111,145 cycles # 2.854 GHz - 21,714,903,322 instructions # 2.11 insn per cycle - 3.660758937 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158133E-004 -Relative difference = 2.837296512218831e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.836126e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.837051e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.837051e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.946025 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.485473e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.486697e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.486697e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 6.608584 sec INFO: No Floating Point Exceptions have been reported - 25,625,027,072 cycles # 2.863 GHz - 78,943,584,564 instructions # 3.08 insn per cycle - 8.950491990 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4893) (avx2: 0) (512y: 0) (512z: 0) + 23,128,930,918 cycles # 3.499 GHz + 78,774,341,287 instructions # 3.41 insn per cycle + 6.610423185 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4856) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.512313e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.515690e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.515690e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.682891 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.828492e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.833038e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.833038e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 3.407449 sec INFO: No Floating Point Exceptions have been reported - 12,903,818,271 cycles # 2.754 GHz - 39,293,324,950 instructions # 3.05 insn per cycle - 4.687529036 seconds time elapsed + 11,665,541,287 cycles # 3.423 GHz + 39,286,544,282 instructions # 3.37 insn per cycle + 3.409284195 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13184) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.867831e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.884189e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.884189e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.098804 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.132223e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.134549e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.134549e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.458741 sec INFO: No Floating Point Exceptions have been reported - 5,587,651,201 cycles # 2.658 GHz - 13,696,262,775 instructions # 2.45 insn per cycle - 2.103410758 seconds time elapsed + 4,819,793,264 cycles # 3.300 GHz + 13,690,800,219 instructions # 2.84 insn per cycle + 1.460705015 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.952196e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.973818e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.973818e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.845912 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.277581e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.280637e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.280637e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.293724 sec INFO: No Floating Point Exceptions have been reported - 4,903,860,646 cycles # 2.651 GHz - 12,352,108,328 instructions # 2.52 insn per cycle - 1.850421022 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) + 4,274,915,564 cycles # 3.300 GHz + 12,358,080,501 instructions # 2.89 insn per cycle + 1.295644687 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10349) (512y: 54) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.711524e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.723541e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.723541e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.457485 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.025590e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.033186e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.033186e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 0.819621 sec INFO: No Floating Point Exceptions have been reported - 4,130,677,154 cycles # 1.678 GHz - 6,346,127,118 instructions # 1.54 insn per cycle - 2.462055019 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) + 2,709,492,339 cycles # 3.299 GHz + 6,329,068,444 instructions # 2.34 insn per cycle + 0.821511880 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1825) (512y: 70) (512z: 9372) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt index 07bc3b6c73..0602847f29 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt @@ -1,229 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_15:12:42 +DATE: 2024-05-16_15:17:10 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.490501e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.518177e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.520849e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.515008 sec -INFO: No Floating Point Exceptions have been reported - 2,117,861,647 cycles # 2.847 GHz - 3,355,581,223 instructions # 1.58 insn per cycle - 0.805282012 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.120060e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.152876e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.154244e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 -TOTAL : 3.146209 sec -INFO: No Floating Point Exceptions have been reported - 9,794,350,225 cycles # 2.878 GHz - 20,567,996,876 instructions # 2.10 insn per cycle - 3.458179285 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158133E-004 -Relative difference = 2.837296512218831e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.854249e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.855163e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.855163e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.490303e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.491536e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.491536e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 8.856220 sec +TOTAL : 6.593708 sec INFO: No Floating Point Exceptions have been reported - 25,606,958,110 cycles # 2.890 GHz - 78,936,876,492 instructions # 3.08 insn per cycle - 8.860490718 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4893) (avx2: 0) (512y: 0) (512z: 0) + 23,068,308,718 cycles # 3.498 GHz + 78,768,722,727 instructions # 3.41 insn per cycle + 6.595351939 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4856) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.547585e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.550823e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.550823e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.830448e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.835008e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.835008e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 4.633866 sec +TOTAL : 3.402743 sec INFO: No Floating Point Exceptions have been reported - 12,886,616,952 cycles # 2.779 GHz - 39,279,548,039 instructions # 3.05 insn per cycle - 4.638052623 seconds time elapsed + 11,658,227,607 cycles # 3.425 GHz + 39,273,680,832 instructions # 3.37 insn per cycle + 3.404413023 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13184) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.950793e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.966539e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.966539e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.132716e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.135051e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.135051e+04 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 2.073967 sec +TOTAL : 1.455134 sec INFO: No Floating Point Exceptions have been reported - 5,577,712,569 cycles # 2.685 GHz - 13,684,498,611 instructions # 2.45 insn per cycle - 2.078154877 seconds time elapsed + 4,806,609,884 cycles # 3.300 GHz + 13,680,092,385 instructions # 2.85 insn per cycle + 1.456788218 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.068596e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.089664e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.089664e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.278350e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.281359e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.281359e+04 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.820316 sec +TOTAL : 1.290262 sec INFO: No Floating Point Exceptions have been reported - 4,894,997,970 cycles # 2.684 GHz - 12,339,079,686 instructions # 2.52 insn per cycle - 1.824557454 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) + 4,262,565,757 cycles # 3.300 GHz + 12,349,006,230 instructions # 2.90 insn per cycle + 1.291919117 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10349) (512y: 54) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.817590e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.829323e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.829323e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.026526e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.034134e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.034134e+04 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 2.417091 sec +TOTAL : 0.816626 sec INFO: No Floating Point Exceptions have been reported - 4,131,104,953 cycles # 1.707 GHz - 6,332,486,091 instructions # 1.53 insn per cycle - 2.421265188 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) + 2,696,368,067 cycles # 3.296 GHz + 6,319,179,543 instructions # 2.34 insn per cycle + 0.818275093 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1825) (512y: 70) (512z: 9372) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt index 0a65f9fefe..d568f8fe7a 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt @@ -1,229 +1,128 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_15:09:55 +DATE: 2024-05-16_15:16:10 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.458490e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.485387e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.487802e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.511372 sec -INFO: No Floating Point Exceptions have been reported - 2,125,154,213 cycles # 2.818 GHz - 3,305,948,128 instructions # 1.56 insn per cycle - 0.811831996 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.112497e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.145168e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.146541e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.091906 sec -INFO: No Floating Point Exceptions have been reported - 9,555,297,501 cycles # 2.852 GHz - 20,467,928,496 instructions # 2.14 insn per cycle - 3.408325542 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158133E-004 -Relative difference = 2.837296512218831e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.835837e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.836698e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.836698e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.943298 sec -INFO: No Floating Point Exceptions have been reported - 25,616,203,937 cycles # 2.864 GHz - 78,941,981,933 instructions # 3.08 insn per cycle - 8.947377666 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4893) (avx2: 0) (512y: 0) (512z: 0) +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe: Aborted + 4,205,713 cycles # 3.250 GHz + 6,350,306 instructions # 1.51 insn per cycle + 0.037928460 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4856) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.509219e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.512397e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.512397e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.682859 sec -INFO: No Floating Point Exceptions have been reported - 12,889,261,061 cycles # 2.751 GHz - 39,280,374,746 instructions # 3.05 insn per cycle - 4.687212544 seconds time elapsed +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe: Aborted + 4,091,396 cycles # 3.246 GHz + 6,357,121 instructions # 1.55 insn per cycle + 0.038128326 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13184) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.873155e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.889044e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.889044e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.092539 sec -INFO: No Floating Point Exceptions have been reported - 5,573,290,015 cycles # 2.659 GHz - 13,685,575,452 instructions # 2.46 insn per cycle - 2.096738730 seconds time elapsed +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe: Aborted + 4,461,358 cycles # 3.221 GHz + 6,356,202 instructions # 1.42 insn per cycle + 0.038835441 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.869803e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.890096e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.890096e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.858817 sec -INFO: No Floating Point Exceptions have been reported - 4,890,594,740 cycles # 2.626 GHz - 12,341,872,390 instructions # 2.52 insn per cycle - 1.863321950 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe: Aborted + 4,221,853 cycles # 3.251 GHz + 6,343,286 instructions # 1.50 insn per cycle + 0.039304724 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10349) (512y: 54) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.721792e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.733746e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.733746e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.449544 sec -INFO: No Floating Point Exceptions have been reported - 4,111,968,902 cycles # 1.676 GHz - 6,335,563,564 instructions # 1.54 insn per cycle - 2.453951471 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe: Aborted + 4,103,834 cycles # 3.247 GHz + 6,364,919 instructions # 1.55 insn per cycle + 0.038546564 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1825) (512y: 70) (512z: 9372) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt index b300efd9c0..894e5462b0 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt @@ -1,232 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_15:07:12 +DATE: 2024-05-16_15:15:34 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.175456e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.487401e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.489887e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.516778 sec -INFO: No Floating Point Exceptions have been reported - 2,106,838,284 cycles # 2.817 GHz - 3,334,047,065 instructions # 1.58 insn per cycle - 0.806903831 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.725415e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.181222e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.182613e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.208931 sec -INFO: No Floating Point Exceptions have been reported - 9,884,616,856 cycles # 2.852 GHz - 22,569,706,597 instructions # 2.28 insn per cycle - 3.521271497 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158133E-004 -Relative difference = 2.837296512218831e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.838805e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.839662e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.839662e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.928794 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.490893e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.492126e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.492126e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 6.591741 sec INFO: No Floating Point Exceptions have been reported - 25,578,535,475 cycles # 2.864 GHz - 78,941,438,017 instructions # 3.09 insn per cycle - 8.932959256 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4893) (avx2: 0) (512y: 0) (512z: 0) + 23,066,107,487 cycles # 3.499 GHz + 78,768,756,399 instructions # 3.41 insn per cycle + 6.593333180 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4856) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.490750e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.493870e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.493870e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.708141 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.837638e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.842159e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.842159e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 3.397502 sec INFO: No Floating Point Exceptions have been reported - 12,873,433,154 cycles # 2.733 GHz - 39,280,620,994 instructions # 3.05 insn per cycle - 4.712353785 seconds time elapsed + 11,654,233,176 cycles # 3.429 GHz + 39,272,830,844 instructions # 3.37 insn per cycle + 3.399090650 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13184) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.853620e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.869024e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.869024e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.097850 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.129984e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.132298e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.132298e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.458580 sec INFO: No Floating Point Exceptions have been reported - 5,573,477,429 cycles # 2.652 GHz - 13,685,909,410 instructions # 2.46 insn per cycle - 2.102047066 seconds time elapsed + 4,817,959,562 cycles # 3.300 GHz + 13,680,105,128 instructions # 2.84 insn per cycle + 1.460248562 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.970623e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.991129e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.991129e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.837793 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.278698e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.281715e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.281715e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.289991 sec INFO: No Floating Point Exceptions have been reported - 4,885,535,539 cycles # 2.653 GHz - 12,340,762,979 instructions # 2.53 insn per cycle - 1.841998870 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) + 4,261,761,014 cycles # 3.300 GHz + 12,348,842,997 instructions # 2.90 insn per cycle + 1.291662517 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10349) (512y: 54) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.715803e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.727367e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.727367e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.451983 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.025908e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.033553e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.033553e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 0.816861 sec INFO: No Floating Point Exceptions have been reported - 4,110,713,398 cycles # 1.674 GHz - 6,334,867,690 instructions # 1.54 insn per cycle - 2.456147392 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) + 2,697,378,644 cycles # 3.297 GHz + 6,319,220,581 instructions # 2.34 insn per cycle + 0.818512324 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1825) (512y: 70) (512z: 9372) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt index 254c65fd8c..0454e77e5b 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt @@ -1,229 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_14:40:17 +DATE: 2024-05-16_14:58:27 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.472040e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.495257e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.497568e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.530193 sec -INFO: No Floating Point Exceptions have been reported - 2,179,825,483 cycles # 2.820 GHz - 3,416,926,116 instructions # 1.57 insn per cycle - 0.832303660 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.149957e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.177471e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.178689e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.025570 sec -INFO: No Floating Point Exceptions have been reported - 9,343,829,120 cycles # 2.851 GHz - 20,017,847,921 instructions # 2.14 insn per cycle - 3.337093329 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158133E-004 -Relative difference = 2.837296512218831e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.844549e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.845438e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.845438e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.900941 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.503400e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.504651e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.504651e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 6.559421 sec INFO: No Floating Point Exceptions have been reported - 25,492,945,375 cycles # 2.863 GHz - 78,715,017,784 instructions # 3.09 insn per cycle - 8.905151100 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4264) (avx2: 0) (512y: 0) (512z: 0) + 22,949,032,197 cycles # 3.498 GHz + 78,473,114,104 instructions # 3.42 insn per cycle + 6.560988078 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4246) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.432714e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.435728e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.435728e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.786568 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.747589e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.751964e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.751964e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 3.461838 sec INFO: No Floating Point Exceptions have been reported - 12,968,671,480 cycles # 2.709 GHz - 39,227,279,421 instructions # 3.02 insn per cycle - 4.790848376 seconds time elapsed + 11,694,264,024 cycles # 3.377 GHz + 39,190,674,892 instructions # 3.35 insn per cycle + 3.463480087 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:12951) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.791500e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.806568e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.806568e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.114108 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.141112e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.143469e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.143469e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.444335 sec INFO: No Floating Point Exceptions have been reported - 5,617,875,214 cycles # 2.653 GHz - 13,801,216,605 instructions # 2.46 insn per cycle - 2.118326582 seconds time elapsed + 4,771,097,381 cycles # 3.300 GHz + 13,779,678,730 instructions # 2.89 insn per cycle + 1.446009503 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11422) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.808696e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.827867e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.827867e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.871093 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.265100e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.268090e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.268090e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.303635 sec INFO: No Floating Point Exceptions have been reported - 4,977,184,975 cycles # 2.656 GHz - 12,467,160,434 instructions # 2.50 insn per cycle - 1.875328468 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10258) (512y: 240) (512z: 0) + 4,306,878,290 cycles # 3.300 GHz + 12,450,724,323 instructions # 2.89 insn per cycle + 1.305387210 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10290) (512y: 226) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.708154e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.719459e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.719459e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.454314 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.025561e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.033157e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.033157e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 0.816874 sec INFO: No Floating Point Exceptions have been reported - 4,118,637,907 cycles # 1.676 GHz - 6,458,862,875 instructions # 1.57 insn per cycle - 2.458530246 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1647) (512y: 192) (512z: 9375) + 2,699,603,241 cycles # 3.299 GHz + 6,439,988,180 instructions # 2.39 insn per cycle + 0.818482763 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1658) (512y: 182) (512z: 9374) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt index 452f4e853d..dfc931b6b1 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt @@ -1,229 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_14:53:22 +DATE: 2024-05-16_15:08:10 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.253411e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.278108e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.280152e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.538797 sec -INFO: No Floating Point Exceptions have been reported - 2,198,780,840 cycles # 2.857 GHz - 3,392,092,682 instructions # 1.54 insn per cycle - 0.826434194 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.756018e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.782691e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.783822e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.316768 sec -INFO: No Floating Point Exceptions have been reported - 10,315,360,608 cycles # 2.881 GHz - 23,624,745,879 instructions # 2.29 insn per cycle - 3.638219909 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158122E-004 -Relative difference = 2.837296513854949e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.179521e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.179964e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.179964e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 39.248920 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 5.014138e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.014639e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.014639e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 32.715828 sec INFO: No Floating Point Exceptions have been reported - 113,511,319,041 cycles # 2.892 GHz - 144,820,446,927 instructions # 1.28 insn per cycle - 39.253177511 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:21353) (avx2: 0) (512y: 0) (512z: 0) + 114,390,740,913 cycles # 3.496 GHz + 143,868,213,956 instructions # 1.26 insn per cycle + 32.717534500 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:21197) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198140450E-004 Relative difference = 2.83729918072716e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.047626e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.050057e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.050057e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 5.390362 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.957067e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.960134e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.960134e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 4.152140 sec INFO: No Floating Point Exceptions have been reported - 14,740,564,650 cycles # 2.733 GHz - 37,575,494,329 instructions # 2.55 insn per cycle - 5.394647902 seconds time elapsed + 14,524,143,789 cycles # 3.497 GHz + 37,569,009,758 instructions # 2.59 insn per cycle + 4.153813868 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:68119) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141209E-004 Relative difference = 2.8372990661989057e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.230737e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.243892e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.243892e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.278344 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 9.045014e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.059776e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.059776e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.820488 sec INFO: No Floating Point Exceptions have been reported - 6,134,003,628 cycles # 2.689 GHz - 13,061,930,844 instructions # 2.13 insn per cycle - 2.282738143 seconds time elapsed + 6,012,459,236 cycles # 3.300 GHz + 13,056,998,383 instructions # 2.17 insn per cycle + 1.822199241 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:46960) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198156789E-004 Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.779670e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.799133e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.799133e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.877722 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.099365e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.101589e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.101589e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.499142 sec INFO: No Floating Point Exceptions have been reported - 5,068,047,565 cycles # 2.694 GHz - 11,440,450,267 instructions # 2.26 insn per cycle - 1.882139324 seconds time elapsed + 4,951,930,645 cycles # 3.300 GHz + 11,434,500,619 instructions # 2.31 insn per cycle + 1.500826567 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:40434) (512y: 285) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198156789E-004 Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.093705e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.106755e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.106755e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.321771 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.981779e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.989097e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.989097e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 0.834999 sec INFO: No Floating Point Exceptions have been reported - 3,974,444,581 cycles # 1.709 GHz - 5,942,873,144 instructions # 1.50 insn per cycle - 2.326156002 seconds time elapsed + 2,755,403,907 cycles # 3.294 GHz + 5,932,997,077 instructions # 2.15 insn per cycle + 0.836712944 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2455) (512y: 337) (512z:39411) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198156789E-004 Relative difference = 2.837296715097453e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt index 00ea23e18d..70bd8c4f55 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt @@ -1,229 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_14:54:29 +DATE: 2024-05-16_15:08:54 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.259147e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.284136e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.286360e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.536782 sec -INFO: No Floating Point Exceptions have been reported - 2,193,506,190 cycles # 2.857 GHz - 3,337,314,407 instructions # 1.52 insn per cycle - 0.824492176 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.761556e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.788263e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.789425e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.301197 sec -INFO: No Floating Point Exceptions have been reported - 10,264,886,616 cycles # 2.886 GHz - 23,377,018,059 instructions # 2.28 insn per cycle - 3.615104997 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158122E-004 -Relative difference = 2.837296513854949e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.170908e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.171353e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.171353e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 39.330349 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.978402e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.978892e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.978892e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 32.950642 sec INFO: No Floating Point Exceptions have been reported - 113,688,017,774 cycles # 2.891 GHz - 144,788,018,158 instructions # 1.27 insn per cycle - 39.334720458 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:20719) (avx2: 0) (512y: 0) (512z: 0) + 115,209,470,120 cycles # 3.496 GHz + 143,948,414,796 instructions # 1.25 insn per cycle + 32.952388317 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:20527) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198140461E-004 Relative difference = 2.8372991790910424e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.974783e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.977013e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.977013e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 5.522774 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.845307e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.848117e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.848117e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 4.272369 sec INFO: No Floating Point Exceptions have been reported - 15,220,566,650 cycles # 2.755 GHz - 37,763,046,074 instructions # 2.48 insn per cycle - 5.527045303 seconds time elapsed + 14,943,641,234 cycles # 3.497 GHz + 37,756,143,792 instructions # 2.53 insn per cycle + 4.274057848 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:68447) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141209E-004 Relative difference = 2.8372990661989057e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.412795e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.426610e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.426610e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.222010 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 9.202487e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.218094e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.218094e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.789478 sec INFO: No Floating Point Exceptions have been reported - 6,000,419,836 cycles # 2.696 GHz - 12,896,174,142 instructions # 2.15 insn per cycle - 2.226315650 seconds time elapsed + 5,910,142,625 cycles # 3.300 GHz + 12,891,193,600 instructions # 2.18 insn per cycle + 1.791231685 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:45929) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198156789E-004 Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.743711e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.762861e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.762861e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.885038 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.087781e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.089982e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.089982e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.515051 sec INFO: No Floating Point Exceptions have been reported - 5,086,798,971 cycles # 2.694 GHz - 11,447,968,989 instructions # 2.25 insn per cycle - 1.889284279 seconds time elapsed + 5,004,419,408 cycles # 3.300 GHz + 11,441,178,201 instructions # 2.29 insn per cycle + 1.516742615 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:40123) (512y: 219) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198156789E-004 Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.141072e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.153903e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.153903e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.306249 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.012442e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.019869e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.019869e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 0.822199 sec INFO: No Floating Point Exceptions have been reported - 3,947,559,408 cycles # 1.709 GHz - 5,896,754,674 instructions # 1.49 insn per cycle - 2.310527958 seconds time elapsed + 2,717,663,220 cycles # 3.300 GHz + 5,886,986,357 instructions # 2.17 insn per cycle + 0.823883432 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1971) (512y: 259) (512z:38937) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198156789E-004 Relative difference = 2.837296715097453e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 15bbe59069..a84c1fb528 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -1,229 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_14:40:50 +DATE: 2024-05-16_14:58:43 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.326887e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.370559e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.376371e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.487782 sec -INFO: No Floating Point Exceptions have been reported - 1,984,813,926 cycles # 2.807 GHz - 2,933,686,219 instructions # 1.48 insn per cycle - 0.764328783 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.584549e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.644337e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.647136e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.720884 sec -INFO: No Floating Point Exceptions have been reported - 5,560,374,951 cycles # 2.849 GHz - 11,900,809,748 instructions # 2.14 insn per cycle - 2.008088048 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626454e-04 -Avg ME (F77/GPU) = 6.6262659968156085E-004 -Relative difference = 2.8371612387547027e-05 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.909633e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.910547e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.910547e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.596251 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.549079e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.550339e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.550339e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 6.440693 sec INFO: No Floating Point Exceptions have been reported - 24,624,004,022 cycles # 2.864 GHz - 78,129,381,217 instructions # 3.17 insn per cycle - 8.600293639 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3603) (avx2: 0) (512y: 0) (512z: 0) + 22,534,025,954 cycles # 3.498 GHz + 78,108,281,721 instructions # 3.47 insn per cycle + 6.442208141 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3570) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274863266294753E-004 Relative difference = 4.92840687132121e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.891953e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.904635e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.904635e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.387612 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 9.776589e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.795572e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.795572e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 +TOTAL : 1.683962 sec INFO: No Floating Point Exceptions have been reported - 6,469,659,104 cycles # 2.706 GHz - 20,120,611,338 instructions # 3.11 insn per cycle - 2.391816623 seconds time elapsed + 5,703,951,901 cycles # 3.385 GHz + 20,115,692,662 instructions # 3.53 insn per cycle + 1.685559639 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274861460025036E-004 Relative difference = 2.2029847170826283e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.562010e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.568248e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.568248e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.059004 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.257616e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.267567e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.267567e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 +TOTAL : 0.732747 sec INFO: No Floating Point Exceptions have been reported - 2,818,181,262 cycles # 2.654 GHz - 6,988,460,270 instructions # 2.48 insn per cycle - 1.063195979 seconds time elapsed + 2,422,509,141 cycles # 3.300 GHz + 6,983,673,080 instructions # 2.88 insn per cycle + 0.734334245 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174574524E-004 Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.763183e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.771185e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.771185e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.938662 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.510971e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.523230e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.523230e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 +TOTAL : 0.659531 sec INFO: No Floating Point Exceptions have been reported - 2,488,393,509 cycles # 2.641 GHz - 6,295,244,635 instructions # 2.53 insn per cycle - 0.942828770 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) + 2,180,863,659 cycles # 3.300 GHz + 6,297,386,644 instructions # 2.89 insn per cycle + 0.661145359 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10903) (512y: 9) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174574524E-004 Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.363218e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.368048e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.368048e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.211358 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.089937e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.122434e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.122434e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 +TOTAL : 0.407286 sec INFO: No Floating Point Exceptions have been reported - 2,044,658,355 cycles # 1.683 GHz - 3,265,998,063 instructions # 1.60 insn per cycle - 1.215542758 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2427) (512y: 46) (512z: 9578) + 1,347,738,835 cycles # 3.299 GHz + 3,256,621,391 instructions # 2.42 insn per cycle + 0.408802321 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2440) (512y: 14) (512z: 9577) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627195e-04 Avg ME (F77/C++) = 6.6271952779373838E-004 Relative difference = 4.193891735414155e-08 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt index e281ad389f..b381877daf 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt @@ -1,246 +1,188 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_15:03:26 +DATE: 2024-05-16_15:13:49 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.615502e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.322427e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.322427e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 -TOTAL : 0.477115 sec -INFO: No Floating Point Exceptions have been reported - 1,936,349,619 cycles # 2.809 GHz - 2,877,179,431 instructions # 1.49 insn per cycle - 0.747561501 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.243623e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.556013e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.556013e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.641710e+00 +- 4.994249e+00 ) GeV^-4 -TOTAL : 1.907816 sec -INFO: No Floating Point Exceptions have been reported - 6,131,638,198 cycles # 2.845 GHz - 12,981,768,605 instructions # 2.12 insn per cycle - 2.213144159 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626454e-04 -Avg ME (F77/GPU) = 6.6262659968156085E-004 -Relative difference = 2.8371612387547027e-05 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.909165e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.910120e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.910120e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.600727 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.548514e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.549769e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.549769e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 6.443701 sec INFO: No Floating Point Exceptions have been reported - 24,637,778,479 cycles # 2.864 GHz - 78,132,610,249 instructions # 3.17 insn per cycle - 8.604942209 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3603) (avx2: 0) (512y: 0) (512z: 0) + 22,537,197,399 cycles # 3.497 GHz + 78,113,205,947 instructions # 3.47 insn per cycle + 6.445374920 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3570) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274863266294753E-004 Relative difference = 4.92840687132121e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.457452e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.468775e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.468775e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.550311 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 9.794631e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.813778e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.813778e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 +TOTAL : 1.682840 sec INFO: No Floating Point Exceptions have been reported - 6,935,275,139 cycles # 2.716 GHz - 20,130,100,658 instructions # 2.90 insn per cycle - 2.554710358 seconds time elapsed + 5,714,389,513 cycles # 3.393 GHz + 20,124,926,959 instructions # 3.52 insn per cycle + 1.684605533 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274861460025036E-004 Relative difference = 2.2029847170826283e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.550561e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.557160e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.557160e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.068476 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.258274e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.268048e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.268048e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 +TOTAL : 0.734277 sec INFO: No Floating Point Exceptions have been reported - 2,830,711,742 cycles # 2.640 GHz - 6,997,830,070 instructions # 2.47 insn per cycle - 1.072903816 seconds time elapsed + 2,428,011,896 cycles # 3.300 GHz + 6,993,718,495 instructions # 2.88 insn per cycle + 0.735944609 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174574524E-004 Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.772339e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.780808e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.780808e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.936387 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.510898e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.523096e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.523096e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 +TOTAL : 0.661358 sec INFO: No Floating Point Exceptions have been reported - 2,497,824,247 cycles # 2.658 GHz - 6,305,168,616 instructions # 2.52 insn per cycle - 0.940674173 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) + 2,187,351,220 cycles # 3.300 GHz + 6,306,890,918 instructions # 2.88 insn per cycle + 0.663074680 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10903) (512y: 9) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174574524E-004 Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.362852e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.367803e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.367803e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.214531 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.096744e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.129704e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.129704e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 +TOTAL : 0.408629 sec INFO: No Floating Point Exceptions have been reported - 2,054,265,568 cycles # 1.686 GHz - 3,276,400,100 instructions # 1.59 insn per cycle - 1.218830996 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2427) (512y: 46) (512z: 9578) + 1,352,894,692 cycles # 3.300 GHz + 3,266,957,888 instructions # 2.41 insn per cycle + 0.410277153 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2440) (512y: 14) (512z: 9577) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627195e-04 Avg ME (F77/C++) = 6.6271952779373838E-004 Relative difference = 4.193891735414155e-08 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt index 1c3846a692..06c6baa998 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt @@ -1,229 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_15:13:15 +DATE: 2024-05-16_15:17:26 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.362325e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.415082e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.420724e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.159397e-01 +- 3.238804e-01 ) GeV^-4 -TOTAL : 0.471632 sec -INFO: No Floating Point Exceptions have been reported - 1,968,514,245 cycles # 2.841 GHz - 2,902,581,432 instructions # 1.47 insn per cycle - 0.750206216 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.620229e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.693332e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.696842e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.094367e+02 +- 1.071509e+02 ) GeV^-4 -TOTAL : 1.810763 sec -INFO: No Floating Point Exceptions have been reported - 5,856,364,996 cycles # 2.870 GHz - 12,360,478,892 instructions # 2.11 insn per cycle - 2.100167053 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626454e-04 -Avg ME (F77/GPU) = 6.6262659968156085E-004 -Relative difference = 2.8371612387547027e-05 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.921942e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.922884e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.922884e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.550538e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.551807e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.551807e+03 ) sec^-1 MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 8.542686 sec +TOTAL : 6.436920 sec INFO: No Floating Point Exceptions have been reported - 24,622,493,732 cycles # 2.881 GHz - 78,127,963,456 instructions # 3.17 insn per cycle - 8.546707601 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3603) (avx2: 0) (512y: 0) (512z: 0) + 22,525,491,366 cycles # 3.499 GHz + 78,108,478,651 instructions # 3.47 insn per cycle + 6.438477750 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3570) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274863266294753E-004 Relative difference = 4.92840687132121e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.925135e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.937766e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.937766e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 9.781607e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.800773e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.800773e+03 ) sec^-1 MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 -TOTAL : 2.378023 sec +TOTAL : 1.683203 sec INFO: No Floating Point Exceptions have been reported - 6,481,719,151 cycles # 2.722 GHz - 20,120,720,773 instructions # 3.10 insn per cycle - 2.382079719 seconds time elapsed + 5,705,621,133 cycles # 3.387 GHz + 20,115,711,639 instructions # 3.53 insn per cycle + 1.684787201 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274861460025036E-004 Relative difference = 2.2029847170826283e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.581338e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.587855e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.587855e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.259703e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.269603e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.269603e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 1.046370 sec +TOTAL : 0.732175 sec INFO: No Floating Point Exceptions have been reported - 2,822,358,408 cycles # 2.688 GHz - 6,985,542,199 instructions # 2.48 insn per cycle - 1.050425346 seconds time elapsed + 2,420,845,321 cycles # 3.300 GHz + 6,983,845,118 instructions # 2.88 insn per cycle + 0.733740969 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174574524E-004 Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.806198e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.814674e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.814674e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.511960e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.524145e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.524145e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.917862 sec +TOTAL : 0.659241 sec INFO: No Floating Point Exceptions have been reported - 2,496,110,223 cycles # 2.709 GHz - 6,293,657,033 instructions # 2.52 insn per cycle - 0.921934399 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) + 2,179,991,812 cycles # 3.300 GHz + 6,297,120,369 instructions # 2.89 insn per cycle + 0.660820449 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10903) (512y: 9) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174574524E-004 Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.393764e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.398765e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.398765e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.090242e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.123456e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.123456e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 1.186576 sec +TOTAL : 0.407437 sec INFO: No Floating Point Exceptions have been reported - 2,050,577,153 cycles # 1.723 GHz - 3,264,219,053 instructions # 1.59 insn per cycle - 1.190613213 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2427) (512y: 46) (512z: 9578) + 1,346,315,275 cycles # 3.294 GHz + 3,256,520,218 instructions # 2.42 insn per cycle + 0.409011406 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2440) (512y: 14) (512z: 9577) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627195e-04 Avg ME (F77/C++) = 6.6271952779373838E-004 Relative difference = 4.193891735414155e-08 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt index 97148e3ba7..22ac48e822 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt @@ -1,229 +1,128 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_15:10:28 +DATE: 2024-05-16_15:16:12 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.326390e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.378340e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.384052e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.468432 sec -INFO: No Floating Point Exceptions have been reported - 1,956,153,885 cycles # 2.820 GHz - 2,925,124,547 instructions # 1.50 insn per cycle - 0.750741002 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.616852e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.690868e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.694290e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.758835 sec -INFO: No Floating Point Exceptions have been reported - 5,694,632,258 cycles # 2.846 GHz - 12,170,382,669 instructions # 2.14 insn per cycle - 2.057387110 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626454e-04 -Avg ME (F77/GPU) = 6.6262659968156085E-004 -Relative difference = 2.8371612387547027e-05 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.910170e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.911121e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.911121e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.594641 sec -INFO: No Floating Point Exceptions have been reported - 24,610,525,016 cycles # 2.863 GHz - 78,132,278,540 instructions # 3.17 insn per cycle - 8.598723021 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3603) (avx2: 0) (512y: 0) (512z: 0) +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe: Aborted + 4,029,925 cycles # 3.247 GHz + 6,334,889 instructions # 1.57 insn per cycle + 0.038588342 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3570) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274863266294753E-004 Relative difference = 4.92840687132121e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.888566e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.900956e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.900956e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.388862 sec -INFO: No Floating Point Exceptions have been reported - 6,476,954,136 cycles # 2.708 GHz - 20,121,920,046 instructions # 3.11 insn per cycle - 2.393015096 seconds time elapsed +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe: Aborted + 4,252,444 cycles # 3.248 GHz + 6,370,939 instructions # 1.50 insn per cycle + 0.038168696 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274861460025036E-004 Relative difference = 2.2029847170826283e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.562187e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.568435e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.568435e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.057977 sec -INFO: No Floating Point Exceptions have been reported - 2,818,730,747 cycles # 2.656 GHz - 6,988,428,853 instructions # 2.48 insn per cycle - 1.062013241 seconds time elapsed +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe: Aborted + 4,035,262 cycles # 3.249 GHz + 6,367,716 instructions # 1.58 insn per cycle + 0.037515296 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174574524E-004 Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.767100e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.775059e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.775059e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.936311 sec -INFO: No Floating Point Exceptions have been reported - 2,487,367,369 cycles # 2.647 GHz - 6,295,352,067 instructions # 2.53 insn per cycle - 0.940344403 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe: Aborted + 4,098,823 cycles # 3.249 GHz + 6,358,246 instructions # 1.55 insn per cycle + 0.038463032 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10903) (512y: 9) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174574524E-004 Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.354758e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.359575e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.359575e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.218960 sec -INFO: No Floating Point Exceptions have been reported - 2,057,435,423 cycles # 1.683 GHz - 3,266,628,935 instructions # 1.59 insn per cycle - 1.223151915 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2427) (512y: 46) (512z: 9578) +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe: Aborted + 4,050,326 cycles # 3.250 GHz + 6,355,795 instructions # 1.57 insn per cycle + 0.037850226 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2440) (512y: 14) (512z: 9577) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627195e-04 Avg ME (F77/C++) = 6.6271952779373838E-004 Relative difference = 4.193891735414155e-08 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt index dc12ca7aae..dd77f1c1e4 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt @@ -1,232 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_15:07:45 +DATE: 2024-05-16_15:15:50 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.747793e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.405382e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.411341e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 -TOTAL : 0.473580 sec -INFO: No Floating Point Exceptions have been reported - 1,929,031,590 cycles # 2.811 GHz - 2,902,080,173 instructions # 1.50 insn per cycle - 0.744461149 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.464876e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.690964e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.694375e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.641710e+00 +- 4.994249e+00 ) GeV^-4 -TOTAL : 1.841417 sec -INFO: No Floating Point Exceptions have been reported - 5,892,322,421 cycles # 2.846 GHz - 12,206,550,799 instructions # 2.07 insn per cycle - 2.128532659 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626454e-04 -Avg ME (F77/GPU) = 6.6262659968156085E-004 -Relative difference = 2.8371612387547027e-05 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.911509e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.912427e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.912427e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.588000 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.550847e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.552104e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.552104e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 6.435838 sec INFO: No Floating Point Exceptions have been reported - 24,603,486,303 cycles # 2.864 GHz - 78,128,844,221 instructions # 3.18 insn per cycle - 8.592028071 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3603) (avx2: 0) (512y: 0) (512z: 0) + 22,523,261,605 cycles # 3.499 GHz + 78,108,255,985 instructions # 3.47 insn per cycle + 6.437369331 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3570) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274863266294753E-004 Relative difference = 4.92840687132121e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.897521e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.909886e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.909886e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.385730 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 9.783581e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.802686e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.802686e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 +TOTAL : 1.682828 sec INFO: No Floating Point Exceptions have been reported - 6,477,077,766 cycles # 2.711 GHz - 20,121,628,941 instructions # 3.11 insn per cycle - 2.389950461 seconds time elapsed + 5,704,990,484 cycles # 3.388 GHz + 20,115,695,758 instructions # 3.53 insn per cycle + 1.684404018 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274861460025036E-004 Relative difference = 2.2029847170826283e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.564279e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.570570e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.570570e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.056604 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.258809e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.268753e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.268753e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 +TOTAL : 0.732400 sec INFO: No Floating Point Exceptions have been reported - 2,817,814,854 cycles # 2.658 GHz - 6,988,003,654 instructions # 2.48 insn per cycle - 1.060745031 seconds time elapsed + 2,421,755,654 cycles # 3.300 GHz + 6,983,836,255 instructions # 2.88 insn per cycle + 0.734028274 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174574524E-004 Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.769248e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.777272e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.777272e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.935150 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.511906e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.524099e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.524099e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 +TOTAL : 0.659359 sec INFO: No Floating Point Exceptions have been reported - 2,489,664,656 cycles # 2.652 GHz - 6,295,373,565 instructions # 2.53 insn per cycle - 0.939255376 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) + 2,180,577,486 cycles # 3.300 GHz + 6,297,266,641 instructions # 2.89 insn per cycle + 0.661018945 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10903) (512y: 9) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174574524E-004 Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.359069e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.363945e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.363945e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.214973 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.090536e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.123527e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.123527e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 +TOTAL : 0.407424 sec INFO: No Floating Point Exceptions have been reported - 2,049,104,437 cycles # 1.682 GHz - 3,266,431,248 instructions # 1.59 insn per cycle - 1.219018056 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2427) (512y: 46) (512z: 9578) + 1,346,438,406 cycles # 3.294 GHz + 3,256,919,958 instructions # 2.42 insn per cycle + 0.409008098 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2440) (512y: 14) (512z: 9577) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627195e-04 Avg ME (F77/C++) = 6.6271952779373838E-004 Relative difference = 4.193891735414155e-08 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt index fb9b3d5f50..fc36a399b8 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt @@ -1,229 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_14:41:16 +DATE: 2024-05-16_14:58:54 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.355693e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.401036e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.406994e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.484635 sec -INFO: No Floating Point Exceptions have been reported - 1,972,569,534 cycles # 2.816 GHz - 2,939,499,932 instructions # 1.49 insn per cycle - 0.757402101 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.619113e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.679355e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.682149e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.713739 sec -INFO: No Floating Point Exceptions have been reported - 5,540,767,327 cycles # 2.848 GHz - 11,699,037,597 instructions # 2.11 insn per cycle - 2.001424634 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626454e-04 -Avg ME (F77/GPU) = 6.6262659968156085E-004 -Relative difference = 2.8371612387547027e-05 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.914726e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.915647e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.915647e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.573445 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.574663e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.575966e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.575966e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 6.376595 sec INFO: No Floating Point Exceptions have been reported - 24,558,709,341 cycles # 2.863 GHz - 77,854,833,330 instructions # 3.17 insn per cycle - 8.577561930 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3114) (avx2: 0) (512y: 0) (512z: 0) + 22,304,285,207 cycles # 3.497 GHz + 77,752,971,522 instructions # 3.49 insn per cycle + 6.378130573 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3125) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627487e-04 -Avg ME (F77/C++) = 6.6274866268634797E-004 -Relative difference = 5.630135835748959e-08 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274863364631370E-004 +Relative difference = 5.076783822441729e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.975982e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.989277e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.989277e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.359034 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 9.796224e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.815339e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.815339e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 +TOTAL : 1.680504 sec INFO: No Floating Point Exceptions have been reported - 6,427,044,365 cycles # 2.721 GHz - 20,086,102,386 instructions # 3.13 insn per cycle - 2.363343503 seconds time elapsed + 5,672,939,486 cycles # 3.373 GHz + 20,080,965,368 instructions # 3.54 insn per cycle + 1.682166460 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13452) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274861465384638E-004 Relative difference = 2.211071647257023e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.504468e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.510257e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.510257e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.097904 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.181406e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.190597e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.190597e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 +TOTAL : 0.758068 sec INFO: No Floating Point Exceptions have been reported - 2,922,790,348 cycles # 2.654 GHz - 7,129,934,034 instructions # 2.44 insn per cycle - 1.101954791 seconds time elapsed + 2,506,172,888 cycles # 3.300 GHz + 7,125,446,367 instructions # 2.84 insn per cycle + 0.759654544 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12261) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271939668077068E-004 Relative difference = 5.008498817890231e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.699271e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.706623e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.706623e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.973114 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.425163e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.436500e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.436500e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 +TOTAL : 0.682581 sec INFO: No Floating Point Exceptions have been reported - 2,595,556,237 cycles # 2.658 GHz - 6,438,662,691 instructions # 2.48 insn per cycle - 0.977341866 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11276) (512y: 27) (512z: 0) + 2,257,025,798 cycles # 3.300 GHz + 6,436,084,408 instructions # 2.85 insn per cycle + 0.684182276 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11302) (512y: 13) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271939668077068E-004 Relative difference = 5.008498817890231e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.316379e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.321013e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.321013e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.254012 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.899548e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.929377e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.929377e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 +TOTAL : 0.426829 sec INFO: No Floating Point Exceptions have been reported - 2,116,081,195 cycles # 1.683 GHz - 3,427,806,501 instructions # 1.62 insn per cycle - 1.258282002 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2924) (512y: 22) (512z: 9654) + 1,409,649,726 cycles # 3.293 GHz + 3,419,604,594 instructions # 2.43 insn per cycle + 0.428369002 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2934) (512y: 14) (512z: 9653) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627195e-04 Avg ME (F77/C++) = 6.6271952032322112E-004 Relative difference = 3.066639970473621e-08 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt index 3f8f67a608..43b17e8e7f 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt @@ -1,229 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_14:55:36 +DATE: 2024-05-16_15:09:39 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.546626e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.586411e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.590653e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.493388 sec -INFO: No Floating Point Exceptions have been reported - 2,052,942,224 cycles # 2.851 GHz - 3,071,897,705 instructions # 1.50 insn per cycle - 0.778091403 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.711232e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.769137e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.771784e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.862053 sec -INFO: No Floating Point Exceptions have been reported - 6,039,204,312 cycles # 2.872 GHz - 11,937,016,347 instructions # 1.98 insn per cycle - 2.158356809 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626454e-04 -Avg ME (F77/GPU) = 6.6262660579844562E-004 -Relative difference = 2.836238137986709e-05 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.454006e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.454774e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.454774e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 -TOTAL : 30.077717 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 6.188091e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.188835e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.188835e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.204932e-01 +- 3.252405e-01 ) GeV^-4 +TOTAL : 26.510008 sec INFO: No Floating Point Exceptions have been reported - 86,228,096,895 cycles # 2.867 GHz - 135,581,749,205 instructions # 1.57 insn per cycle - 30.081848617 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:15593) (avx2: 0) (512y: 0) (512z: 0) + 92,700,828,536 cycles # 3.497 GHz + 134,385,753,101 instructions # 1.45 insn per cycle + 26.511637845 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:15522) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627535e-04 -Avg ME (F77/C++) = 6.6275351196781740E-004 -Relative difference = 1.805772034719401e-08 +Avg ME (F77/C++) = 6.6275351143091354E-004 +Relative difference = 1.7247609416495452e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.767198e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.779100e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.779100e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 -TOTAL : 2.432356 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 8.677211e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.692275e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.692275e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.211992e-01 +- 3.254573e-01 ) GeV^-4 +TOTAL : 1.896646 sec INFO: No Floating Point Exceptions have been reported - 6,776,462,064 cycles # 2.783 GHz - 19,386,992,522 instructions # 2.86 insn per cycle - 2.436630257 seconds time elapsed + 6,557,530,460 cycles # 3.455 GHz + 19,381,795,156 instructions # 2.96 insn per cycle + 1.898268655 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:69681) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274862707273868E-004 Relative difference = 4.0849182767952624e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.415254e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.420302e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.420302e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 1.167177 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.740924e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.746776e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.746776e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.211846e-01 +- 3.254638e-01 ) GeV^-4 +TOTAL : 0.948430 sec INFO: No Floating Point Exceptions have been reported - 3,174,327,264 cycles # 2.711 GHz - 6,807,988,001 instructions # 2.14 insn per cycle - 1.171487938 seconds time elapsed + 3,134,223,153 cycles # 3.300 GHz + 6,803,645,460 instructions # 2.17 insn per cycle + 0.950022327 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:49077) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627273e-04 Avg ME (F77/C++) = 6.6272731558747466E-004 Relative difference = 2.3520194007978538e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.702865e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.710950e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.710950e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 0.971352 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.106465e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.115078e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.115078e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.211846e-01 +- 3.254638e-01 ) GeV^-4 +TOTAL : 0.785014 sec INFO: No Floating Point Exceptions have been reported - 2,641,359,018 cycles # 2.709 GHz - 5,985,956,533 instructions # 2.27 insn per cycle - 0.975633569 seconds time elapsed + 2,595,353,867 cycles # 3.300 GHz + 5,981,224,171 instructions # 2.30 insn per cycle + 0.786569307 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:42677) (512y: 11) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627273e-04 Avg ME (F77/C++) = 6.6272731558747466E-004 Relative difference = 2.3520194007978538e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.382138e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.387143e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.387143e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060904e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.195051 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.297777e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.318841e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.318841e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.211848e-01 +- 3.254639e-01 ) GeV^-4 +TOTAL : 0.503806 sec INFO: No Floating Point Exceptions have been reported - 2,079,765,601 cycles # 1.735 GHz - 3,501,460,071 instructions # 1.68 insn per cycle - 1.199295448 seconds time elapsed + 1,665,883,909 cycles # 3.298 GHz + 3,493,968,719 instructions # 2.10 insn per cycle + 0.505369622 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5210) (512y: 3) (512z:44829) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627275e-04 Avg ME (F77/C++) = 6.6272750363879224E-004 Relative difference = 5.490631193034436e-09 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt index f651d28060..4f88708a74 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt @@ -1,229 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_14:56:25 +DATE: 2024-05-16_15:10:12 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.511551e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.549792e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.554590e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.495451 sec -INFO: No Floating Point Exceptions have been reported - 2,081,587,427 cycles # 2.833 GHz - 3,058,350,902 instructions # 1.47 insn per cycle - 0.791962629 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.609535e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.664835e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.667390e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.882393 sec -INFO: No Floating Point Exceptions have been reported - 6,108,387,546 cycles # 2.880 GHz - 12,340,826,531 instructions # 2.02 insn per cycle - 2.177538628 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626454e-04 -Avg ME (F77/GPU) = 6.6262660579844562E-004 -Relative difference = 2.836238137986709e-05 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.501753e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.502516e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.502516e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 -TOTAL : 29.816879 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 6.280786e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.281547e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.281547e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.204931e-01 +- 3.252404e-01 ) GeV^-4 +TOTAL : 26.118990 sec INFO: No Floating Point Exceptions have been reported - 86,332,810,324 cycles # 2.895 GHz - 136,005,056,328 instructions # 1.58 insn per cycle - 29.820915946 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:15571) (avx2: 0) (512y: 0) (512z: 0) + 91,385,851,959 cycles # 3.499 GHz + 135,143,127,407 instructions # 1.48 insn per cycle + 26.120638728 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:15420) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627535e-04 -Avg ME (F77/C++) = 6.6275348988418387E-004 -Relative difference = 1.5263316105958472e-08 +Avg ME (F77/C++) = 6.6275348903634818E-004 +Relative difference = 1.654257859275456e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.665500e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.677672e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.677672e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 -TOTAL : 2.468426 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 8.477331e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.491555e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.491555e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.211992e-01 +- 3.254573e-01 ) GeV^-4 +TOTAL : 1.941105 sec INFO: No Floating Point Exceptions have been reported - 6,845,942,231 cycles # 2.769 GHz - 19,438,050,467 instructions # 2.84 insn per cycle - 2.472841886 seconds time elapsed + 6,641,629,719 cycles # 3.419 GHz + 19,434,304,508 instructions # 2.93 insn per cycle + 1.942818126 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:69723) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274862764021530E-004 Relative difference = 4.170542995014107e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.417376e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.422501e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.422501e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 1.165327 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.794550e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.800752e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.800752e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.211846e-01 +- 3.254638e-01 ) GeV^-4 +TOTAL : 0.920220 sec INFO: No Floating Point Exceptions have been reported - 3,124,352,057 cycles # 2.673 GHz - 6,718,803,660 instructions # 2.15 insn per cycle - 1.169556736 seconds time elapsed + 3,041,217,956 cycles # 3.300 GHz + 6,714,527,084 instructions # 2.21 insn per cycle + 0.921825192 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:47667) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627273e-04 Avg ME (F77/C++) = 6.6272731651051409E-004 Relative difference = 2.4912983202981302e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.691458e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.698947e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.698947e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 0.977716 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.123754e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.132315e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.132315e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.211846e-01 +- 3.254638e-01 ) GeV^-4 +TOTAL : 0.778442 sec INFO: No Floating Point Exceptions have been reported - 2,637,169,918 cycles # 2.688 GHz - 5,969,286,098 instructions # 2.26 insn per cycle - 0.981942660 seconds time elapsed + 2,573,375,562 cycles # 3.300 GHz + 5,964,316,953 instructions # 2.32 insn per cycle + 0.780039175 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:41842) (512y: 13) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627273e-04 Avg ME (F77/C++) = 6.6272731651051409E-004 Relative difference = 2.4912983202981302e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.359514e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.364207e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.364207e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060904e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.214640 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.311953e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.333717e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.333717e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.211848e-01 +- 3.254639e-01 ) GeV^-4 +TOTAL : 0.501662 sec INFO: No Floating Point Exceptions have been reported - 2,077,190,375 cycles # 1.705 GHz - 3,494,266,618 instructions # 1.68 insn per cycle - 1.219012886 seconds time elapsed + 1,656,457,681 cycles # 3.293 GHz + 3,487,542,284 instructions # 2.11 insn per cycle + 0.503287639 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4174) (512y: 4) (512z:44472) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627275e-04 Avg ME (F77/C++) = 6.6272750384530066E-004 Relative difference = 5.80223501432476e-09 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 19b36f52e3..4df23c6fce 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,229 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_14:41:41 +DATE: 2024-05-16_14:59:06 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.461685e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.485212e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.487571e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.529512 sec -INFO: No Floating Point Exceptions have been reported - 2,178,837,447 cycles # 2.822 GHz - 3,364,663,947 instructions # 1.54 insn per cycle - 0.830788537 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.126264e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.153475e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.154649e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.029127 sec -INFO: No Floating Point Exceptions have been reported - 9,370,132,482 cycles # 2.854 GHz - 19,961,685,193 instructions # 2.13 insn per cycle - 3.339329204 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266732376103494E-004 -Relative difference = 2.659538381540814e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.814801e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.815638e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.815638e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 9.046315 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.467395e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.468594e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.468594e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 6.654282 sec INFO: No Floating Point Exceptions have been reported - 25,898,843,827 cycles # 2.862 GHz - 79,438,691,532 instructions # 3.07 insn per cycle - 9.050548799 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4858) (avx2: 0) (512y: 0) (512z: 0) + 23,281,892,879 cycles # 3.498 GHz + 79,277,659,651 instructions # 3.41 insn per cycle + 6.655931634 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4801) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731406016235E-004 Relative difference = 2.8059296349552523e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.427576e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.430583e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.430583e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.794109 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.734185e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.738541e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.738541e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 3.471801 sec INFO: No Floating Point Exceptions have been reported - 12,707,110,349 cycles # 2.649 GHz - 38,549,995,901 instructions # 3.03 insn per cycle - 4.798432518 seconds time elapsed + 11,461,390,223 cycles # 3.300 GHz + 38,542,705,401 instructions # 3.36 insn per cycle + 3.473446260 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13163) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730246908442E-004 Relative difference = 2.98084507782618e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.947160e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.962973e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.962973e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.073357 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.159663e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.162119e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.162119e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.421427 sec INFO: No Floating Point Exceptions have been reported - 5,517,673,615 cycles # 2.658 GHz - 13,479,814,632 instructions # 2.44 insn per cycle - 2.077628129 seconds time elapsed + 4,695,608,400 cycles # 3.300 GHz + 13,474,466,372 instructions # 2.87 insn per cycle + 1.423141317 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11242) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730409276857E-004 Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.092853e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.113453e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.113453e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.813039 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.283073e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.286139e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.286139e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.285513 sec INFO: No Floating Point Exceptions have been reported - 4,828,852,439 cycles # 2.658 GHz - 12,135,084,334 instructions # 2.51 insn per cycle - 1.817332368 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10154) (512y: 79) (512z: 0) + 4,247,114,233 cycles # 3.300 GHz + 12,145,146,492 instructions # 2.86 insn per cycle + 1.287225342 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10237) (512y: 45) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730409276857E-004 Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.671146e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.682190e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.682190e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.468744 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.009907e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.017373e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.017373e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 0.823451 sec INFO: No Floating Point Exceptions have been reported - 4,141,507,976 cycles # 1.676 GHz - 6,337,241,929 instructions # 1.53 insn per cycle - 2.472886901 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1803) (512y: 93) (512z: 9358) + 2,715,458,356 cycles # 3.292 GHz + 6,302,170,709 instructions # 2.32 insn per cycle + 0.825158510 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1807) (512y: 61) (512z: 9355) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730409276857E-004 Relative difference = 2.956342832710188e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt index 10c707e81e..54fe1ccb0f 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt @@ -1,229 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_14:42:15 +DATE: 2024-05-16_14:59:22 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.482619e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.506518e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.508744e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.530792 sec -INFO: No Floating Point Exceptions have been reported - 2,175,803,522 cycles # 2.817 GHz - 3,378,965,043 instructions # 1.55 insn per cycle - 0.832396723 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.148343e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.175835e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.177034e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.021680 sec -INFO: No Floating Point Exceptions have been reported - 9,373,127,330 cycles # 2.855 GHz - 21,008,547,067 instructions # 2.24 insn per cycle - 3.339100414 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266732376103494E-004 -Relative difference = 2.659538381540814e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.816314e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.817146e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.817146e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 9.038908 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.464129e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.465346e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.465346e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 6.663279 sec INFO: No Floating Point Exceptions have been reported - 25,883,565,851 cycles # 2.863 GHz - 79,454,182,113 instructions # 3.07 insn per cycle - 9.043041112 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4505) (avx2: 0) (512y: 0) (512z: 0) + 23,317,642,584 cycles # 3.499 GHz + 79,208,074,098 instructions # 3.40 insn per cycle + 6.664923308 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4465) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731406016235E-004 Relative difference = 2.8059296349552523e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.444359e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.447378e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.447378e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.770841 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.686297e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.690537e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.690537e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 3.506677 sec INFO: No Floating Point Exceptions have been reported - 12,673,930,469 cycles # 2.656 GHz - 38,521,208,960 instructions # 3.04 insn per cycle - 4.775036357 seconds time elapsed + 11,576,560,337 cycles # 3.300 GHz + 38,482,658,385 instructions # 3.32 insn per cycle + 3.508299179 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:12930) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730246908442E-004 Relative difference = 2.98084507782618e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.869925e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.885399e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.885399e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.093382 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.148090e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.150478e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.150478e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.436063 sec INFO: No Floating Point Exceptions have been reported - 5,571,627,209 cycles # 2.657 GHz - 13,607,217,607 instructions # 2.44 insn per cycle - 2.097652206 seconds time elapsed + 4,742,292,128 cycles # 3.300 GHz + 13,586,331,325 instructions # 2.86 insn per cycle + 1.437759750 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11327) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730409276857E-004 Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.920636e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.941759e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.941759e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.847928 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.271453e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.274430e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.274430e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.297103 sec INFO: No Floating Point Exceptions have been reported - 4,911,801,030 cycles # 2.653 GHz - 12,271,296,407 instructions # 2.50 insn per cycle - 1.852091714 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10143) (512y: 239) (512z: 0) + 4,285,298,964 cycles # 3.300 GHz + 12,257,011,991 instructions # 2.86 insn per cycle + 1.298819689 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10175) (512y: 225) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730409276857E-004 Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.658898e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.670376e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.670376e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.472418 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.997853e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.005253e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.005253e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 0.828105 sec INFO: No Floating Point Exceptions have been reported - 4,148,038,447 cycles # 1.675 GHz - 6,442,551,576 instructions # 1.55 insn per cycle - 2.476725391 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1628) (512y: 191) (512z: 9356) + 2,734,224,299 cycles # 3.296 GHz + 6,405,345,797 instructions # 2.34 insn per cycle + 0.829773255 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1639) (512y: 181) (512z: 9355) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730409276857E-004 Relative difference = 2.956342832710188e-07 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index 5b5bd116a3..fc32cd9504 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,229 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-16_14:44:06 +DATE: 2024-05-16_15:00:02 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.065678e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.066059e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.066269e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.433623 sec -INFO: No Floating Point Exceptions have been reported - 7,848,341,490 cycles # 2.847 GHz - 17,462,165,188 instructions # 2.22 insn per cycle - 2.813787845 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.279047e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.280919e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.281197e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.985795 sec -INFO: No Floating Point Exceptions have been reported - 12,317,841,072 cycles # 2.855 GHz - 29,065,647,551 instructions # 2.36 insn per cycle - 4.369250222 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 9.872263e-03 -Avg ME (F77/GPU) = 9.8722595284406640E-003 -Relative difference = 3.5164777671934515e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.769533e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.769744e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.769744e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.808252 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 9.974428e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.974682e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.974682e+01 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 5.296181 sec INFO: No Floating Point Exceptions have been reported - 18,798,528,369 cycles # 2.760 GHz - 53,916,630,138 instructions # 2.87 insn per cycle - 6.812355714 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32448) (avx2: 0) (512y: 0) (512z: 0) + 18,531,410,716 cycles # 3.498 GHz + 53,656,558,692 instructions # 2.90 insn per cycle + 5.297695109 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32534) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.539980e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.540062e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.540062e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.433152 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.903537e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.903625e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.903625e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 2.776054 sec INFO: No Floating Point Exceptions have been reported - 9,799,231,624 cycles # 2.852 GHz - 27,092,581,938 instructions # 2.76 insn per cycle - 3.437235180 seconds time elapsed + 9,714,685,657 cycles # 3.498 GHz + 27,085,095,455 instructions # 2.79 insn per cycle + 2.777591504 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:96443) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.326889e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.327284e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.327284e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.592072 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.267474e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.267897e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.267897e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 1.239278 sec INFO: No Floating Point Exceptions have been reported - 4,220,179,984 cycles # 2.645 GHz - 9,560,887,701 instructions # 2.27 insn per cycle - 1.596045022 seconds time elapsed + 4,093,982,090 cycles # 3.300 GHz + 9,554,590,072 instructions # 2.33 insn per cycle + 1.240820628 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84390) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.770010e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.770613e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.770613e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.405649 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.917663e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.918229e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.918229e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 1.075663 sec INFO: No Floating Point Exceptions have been reported - 3,726,923,548 cycles # 2.645 GHz - 8,484,897,516 instructions # 2.28 insn per cycle - 1.409716339 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:79991) (512y: 91) (512z: 0) + 3,553,974,436 cycles # 3.300 GHz + 8,479,633,680 instructions # 2.39 insn per cycle + 1.077213977 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80074) (512y: 57) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.281739e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.282258e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.282258e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.614138 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 8.603814e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.605597e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.605597e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 0.616422 sec INFO: No Floating Point Exceptions have been reported - 2,690,163,143 cycles # 1.663 GHz - 4,272,866,756 instructions # 1.59 insn per cycle - 1.618172762 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2284) (512y: 105) (512z:79105) + 2,035,813,701 cycles # 3.296 GHz + 4,263,524,895 instructions # 2.09 insn per cycle + 0.617919481 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2288) (512y: 73) (512z:79102) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt index 1be1cfeedf..3fce28479c 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt @@ -1,246 +1,188 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-16_15:03:52 +DATE: 2024-05-16_15:14:01 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.063403e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.064386e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.064386e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.395260 sec -INFO: No Floating Point Exceptions have been reported - 7,755,019,436 cycles # 2.852 GHz - 17,230,726,903 instructions # 2.22 insn per cycle - 2.775067655 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.263412e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.297943e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.297943e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.971807 sec -INFO: No Floating Point Exceptions have been reported - 12,283,103,403 cycles # 2.855 GHz - 27,758,308,143 instructions # 2.26 insn per cycle - 4.357937638 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 9.872263e-03 -Avg ME (F77/GPU) = 9.8722595284406640E-003 -Relative difference = 3.5164777671934515e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.361041e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.361236e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.361236e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 7.177191 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 9.977506e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.977760e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.977760e+01 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 5.294370 sec INFO: No Floating Point Exceptions have been reported - 18,941,402,140 cycles # 2.638 GHz - 53,918,413,850 instructions # 2.85 insn per cycle - 7.181202320 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32448) (avx2: 0) (512y: 0) (512z: 0) + 18,525,569,019 cycles # 3.498 GHz + 53,657,433,656 instructions # 2.90 insn per cycle + 5.295871836 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32534) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.538220e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.538308e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.538308e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.437404 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.904848e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.904938e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.904938e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 2.774505 sec INFO: No Floating Point Exceptions have been reported - 9,826,263,323 cycles # 2.856 GHz - 27,093,421,705 instructions # 2.76 insn per cycle - 3.441586183 seconds time elapsed + 9,711,671,203 cycles # 3.499 GHz + 27,086,160,123 instructions # 2.79 insn per cycle + 2.775998605 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:96443) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.320877e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.321321e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.321321e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.595267 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.268708e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.269153e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.269153e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 1.238971 sec INFO: No Floating Point Exceptions have been reported - 4,226,325,559 cycles # 2.644 GHz - 9,562,000,988 instructions # 2.26 insn per cycle - 1.599357751 seconds time elapsed + 4,093,903,617 cycles # 3.300 GHz + 9,555,873,980 instructions # 2.33 insn per cycle + 1.240497644 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84390) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.768698e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.769266e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.769266e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.406299 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.921180e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.921761e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.921761e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 1.074913 sec INFO: No Floating Point Exceptions have been reported - 3,728,202,948 cycles # 2.645 GHz - 8,485,828,873 instructions # 2.28 insn per cycle - 1.410433353 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:79991) (512y: 91) (512z: 0) + 3,551,316,418 cycles # 3.300 GHz + 8,480,411,726 instructions # 2.39 insn per cycle + 1.076454882 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80074) (512y: 57) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.279021e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.279511e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.279511e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.615850 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 8.564439e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.566215e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.566215e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 0.619417 sec INFO: No Floating Point Exceptions have been reported - 2,693,497,833 cycles # 1.663 GHz - 4,273,840,765 instructions # 1.59 insn per cycle - 1.620067219 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2284) (512y: 105) (512z:79105) + 2,045,719,951 cycles # 3.296 GHz + 4,264,571,385 instructions # 2.08 insn per cycle + 0.620945943 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2288) (512y: 73) (512z:79102) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt index 3f519fda03..bd409ca461 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt @@ -1,229 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-16_14:45:01 +DATE: 2024-05-16_15:00:24 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.065431e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.065819e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.066015e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.431749 sec -INFO: No Floating Point Exceptions have been reported - 7,858,443,167 cycles # 2.852 GHz - 17,797,449,482 instructions # 2.26 insn per cycle - 2.810886675 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.189050e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.190883e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.191132e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 4.007219 sec -INFO: No Floating Point Exceptions have been reported - 12,388,147,716 cycles # 2.856 GHz - 29,572,084,158 instructions # 2.39 insn per cycle - 4.393669645 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 9.872263e-03 -Avg ME (F77/GPU) = 9.8722595284406640E-003 -Relative difference = 3.5164777671934515e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.555801e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.556015e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.556015e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.995892 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.001907e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.001932e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.001932e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 5.272707 sec INFO: No Floating Point Exceptions have been reported - 18,868,341,179 cycles # 2.696 GHz - 53,930,114,085 instructions # 2.86 insn per cycle - 6.999840535 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32063) (avx2: 0) (512y: 0) (512z: 0) + 18,450,607,097 cycles # 3.499 GHz + 53,658,226,288 instructions # 2.91 insn per cycle + 5.274217478 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32178) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.547899e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.547983e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.547983e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.415669 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.912922e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.913010e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.913010e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 2.762771 sec INFO: No Floating Point Exceptions have been reported - 9,762,163,827 cycles # 2.856 GHz - 27,089,755,364 instructions # 2.77 insn per cycle - 3.419663266 seconds time elapsed + 9,669,007,974 cycles # 3.499 GHz + 27,071,852,156 instructions # 2.80 insn per cycle + 2.764282785 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:96286) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.328520e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.328923e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.328923e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.591769 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.249552e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.249989e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.249989e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 1.244153 sec INFO: No Floating Point Exceptions have been reported - 4,217,350,816 cycles # 2.647 GHz - 9,560,856,496 instructions # 2.27 insn per cycle - 1.595749154 seconds time elapsed + 4,109,934,644 cycles # 3.300 GHz + 9,549,086,138 instructions # 2.32 insn per cycle + 1.245663580 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84478) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.765235e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.765802e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.765802e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.407302 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.860053e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.860597e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.860597e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 1.088189 sec INFO: No Floating Point Exceptions have been reported - 3,737,969,275 cycles # 2.650 GHz - 8,484,674,655 instructions # 2.27 insn per cycle - 1.411406372 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80014) (512y: 241) (512z: 0) + 3,595,363,738 cycles # 3.300 GHz + 8,473,870,494 instructions # 2.36 insn per cycle + 1.089783231 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80046) (512y: 227) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.273845e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.274322e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.274322e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.617639 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 8.547370e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.549049e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.549049e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 0.620509 sec INFO: No Floating Point Exceptions have been reported - 2,695,774,477 cycles # 1.663 GHz - 4,276,120,388 instructions # 1.59 insn per cycle - 1.621698890 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2169) (512y: 187) (512z:79110) + 2,049,145,259 cycles # 3.296 GHz + 4,264,452,480 instructions # 2.08 insn per cycle + 0.621971347 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2180) (512y: 177) (512z:79109) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index 8097294660..816b77a603 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,229 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-16_14:45:55 +DATE: 2024-05-16_15:00:45 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.560287e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.561087e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.561509e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 -TOTAL : 1.688313 sec -INFO: No Floating Point Exceptions have been reported - 5,610,919,333 cycles # 2.843 GHz - 12,076,970,192 instructions # 2.15 insn per cycle - 2.032164963 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.335524e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.336187e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.336332e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.856829e-04 +- 8.333435e-05 ) GeV^-6 -TOTAL : 1.920912 sec -INFO: No Floating Point Exceptions have been reported - 6,262,064,127 cycles # 2.846 GHz - 13,866,454,713 instructions # 2.21 insn per cycle - 2.256561773 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 9.849636e-03 -Avg ME (F77/GPU) = 9.8712405367667715E-003 -Relative difference = 0.0021934350433631634 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.473644e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.473896e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.473896e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 6.236471 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.035543e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.035570e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.035570e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.924324e-03 +- 4.918778e-03 ) GeV^-6 +TOTAL : 5.101302 sec INFO: No Floating Point Exceptions have been reported - 17,834,532,335 cycles # 2.858 GHz - 53,589,179,622 instructions # 3.00 insn per cycle - 6.240522901 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:20208) (avx2: 0) (512y: 0) (512z: 0) + 17,846,801,084 cycles # 3.498 GHz + 53,421,326,194 instructions # 2.99 insn per cycle + 5.102780488 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:20403) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087541066E-003 -Relative difference = 2.1197698286506752e-08 +Avg ME (F77/C++) = 9.8479612087545108E-003 +Relative difference = 2.11977393295785e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.311314e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.311704e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.311704e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.599839 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.202236e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.202683e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.202683e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.924321e-03 +- 4.918774e-03 ) GeV^-6 +TOTAL : 1.258815 sec INFO: No Floating Point Exceptions have been reported - 4,578,829,094 cycles # 2.856 GHz - 13,761,810,246 instructions # 3.01 insn per cycle - 1.603811766 seconds time elapsed + 4,406,545,916 cycles # 3.497 GHz + 13,755,398,365 instructions # 3.12 insn per cycle + 1.260309156 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:96986) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.847955e-03 Avg ME (F77/C++) = 9.8479546896527003E-003 Relative difference = 3.151388282563952e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.636666e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.638274e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.638274e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.800488 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 8.467032e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.468910e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.468910e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 +TOTAL : 0.625447 sec INFO: No Floating Point Exceptions have been reported - 2,129,570,848 cycles # 2.649 GHz - 4,816,093,977 instructions # 2.26 insn per cycle - 0.804523713 seconds time elapsed + 2,068,444,810 cycles # 3.300 GHz + 4,810,294,241 instructions # 2.33 insn per cycle + 0.626978760 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84904) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 Avg ME (F77/C++) = 9.8929728161070551E-003 Relative difference = 1.858823877057982e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.627354e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.629498e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.629498e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.697299 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 9.742997e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.745348e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.745348e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 +TOTAL : 0.543809 sec INFO: No Floating Point Exceptions have been reported - 1,857,131,979 cycles # 2.651 GHz - 4,273,320,598 instructions # 2.30 insn per cycle - 0.701213399 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80610) (512y: 46) (512z: 0) + 1,798,807,079 cycles # 3.300 GHz + 4,267,865,954 instructions # 2.37 insn per cycle + 0.545350839 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80691) (512y: 12) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 Avg ME (F77/C++) = 9.8929728161070551E-003 Relative difference = 1.858823877057982e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.540089e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.542023e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.542023e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.812936 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.712642e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.713378e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.713378e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.946834e-03 +- 4.941266e-03 ) GeV^-6 +TOTAL : 0.310824 sec INFO: No Floating Point Exceptions have been reported - 1,360,618,833 cycles # 1.668 GHz - 2,159,125,772 instructions # 1.59 insn per cycle - 0.816997353 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2890) (512y: 49) (512z:79305) + 1,027,302,503 cycles # 3.292 GHz + 2,150,832,502 instructions # 2.09 insn per cycle + 0.312380920 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2903) (512y: 17) (512z:79304) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892981e-03 Avg ME (F77/C++) = 9.8929811982958280E-003 Relative difference = 2.0044092642523172e-08 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt index 6d352d97ac..269ddf269d 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt @@ -1,246 +1,188 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-16_15:04:47 +DATE: 2024-05-16_15:14:22 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.582684e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.584567e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.584567e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187094e-05 +- 9.825664e-06 ) GeV^-6 -TOTAL : 1.633735 sec -INFO: No Floating Point Exceptions have been reported - 5,453,177,396 cycles # 2.847 GHz - 11,627,188,509 instructions # 2.13 insn per cycle - 1.971399165 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.292389e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.306150e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.306150e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.856441e-04 +- 8.331096e-05 ) GeV^-6 -TOTAL : 1.932378 sec -INFO: No Floating Point Exceptions have been reported - 6,313,737,946 cycles # 2.848 GHz - 13,568,150,990 instructions # 2.15 insn per cycle - 2.274068662 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 9.849636e-03 -Avg ME (F77/GPU) = 9.8712405367667715E-003 -Relative difference = 0.0021934350433631634 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.447529e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.447785e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.447785e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 6.255519 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.035650e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.035679e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.035679e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.924324e-03 +- 4.918778e-03 ) GeV^-6 +TOTAL : 5.100801 sec INFO: No Floating Point Exceptions have been reported - 17,871,844,477 cycles # 2.856 GHz - 53,590,423,890 instructions # 3.00 insn per cycle - 6.259496797 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:20208) (avx2: 0) (512y: 0) (512z: 0) + 17,851,268,457 cycles # 3.499 GHz + 53,422,135,092 instructions # 2.99 insn per cycle + 5.102295233 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:20403) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087541066E-003 -Relative difference = 2.1197698286506752e-08 +Avg ME (F77/C++) = 9.8479612087545108E-003 +Relative difference = 2.11977393295785e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.319002e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.319403e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.319403e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.595862 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.202973e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.203448e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.203448e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.924321e-03 +- 4.918774e-03 ) GeV^-6 +TOTAL : 1.259077 sec INFO: No Floating Point Exceptions have been reported - 4,573,738,949 cycles # 2.860 GHz - 13,762,785,828 instructions # 3.01 insn per cycle - 1.599904345 seconds time elapsed + 4,407,381,529 cycles # 3.497 GHz + 13,756,329,596 instructions # 3.12 insn per cycle + 1.260579726 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:96986) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.847955e-03 Avg ME (F77/C++) = 9.8479546896527003E-003 Relative difference = 3.151388282563952e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.613525e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.615218e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.615218e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.804350 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 8.454907e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.456799e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.456799e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 +TOTAL : 0.626446 sec INFO: No Floating Point Exceptions have been reported - 2,139,167,872 cycles # 2.648 GHz - 4,817,111,626 instructions # 2.25 insn per cycle - 0.808508083 seconds time elapsed + 2,071,381,535 cycles # 3.300 GHz + 4,811,292,358 instructions # 2.32 insn per cycle + 0.627929179 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84904) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 Avg ME (F77/C++) = 9.8929728161070551E-003 Relative difference = 1.858823877057982e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.603124e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.605216e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.605216e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.699524 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 9.733338e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.735696e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.735696e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 +TOTAL : 0.544424 sec INFO: No Floating Point Exceptions have been reported - 1,862,402,974 cycles # 2.650 GHz - 4,274,167,467 instructions # 2.29 insn per cycle - 0.703628099 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80610) (512y: 46) (512z: 0) + 1,800,615,366 cycles # 3.300 GHz + 4,268,858,978 instructions # 2.37 insn per cycle + 0.545874505 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80691) (512y: 12) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 Avg ME (F77/C++) = 9.8929728161070551E-003 Relative difference = 1.858823877057982e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.580110e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.582197e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.582197e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.807828 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.708326e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.709082e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.709082e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.946834e-03 +- 4.941266e-03 ) GeV^-6 +TOTAL : 0.311599 sec INFO: No Floating Point Exceptions have been reported - 1,354,037,726 cycles # 1.669 GHz - 2,159,114,420 instructions # 1.59 insn per cycle - 0.811949308 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2890) (512y: 49) (512z:79305) + 1,028,673,441 cycles # 3.288 GHz + 2,151,830,707 instructions # 2.09 insn per cycle + 0.313093887 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2903) (512y: 17) (512z:79304) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892981e-03 Avg ME (F77/C++) = 9.8929811982958280E-003 Relative difference = 2.0044092642523172e-08 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt index 2d3f5a3740..2f40c9b08b 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt @@ -1,229 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-16_14:46:34 +DATE: 2024-05-16_15:01:01 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.539024e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.539847e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.540280e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 -TOTAL : 1.689103 sec -INFO: No Floating Point Exceptions have been reported - 5,627,474,622 cycles # 2.847 GHz - 11,923,534,222 instructions # 2.12 insn per cycle - 2.035228412 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.303686e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.304329e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.304469e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.856829e-04 +- 8.333435e-05 ) GeV^-6 -TOTAL : 1.932451 sec -INFO: No Floating Point Exceptions have been reported - 6,311,455,519 cycles # 2.848 GHz - 13,762,708,375 instructions # 2.18 insn per cycle - 2.272906437 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 9.849636e-03 -Avg ME (F77/GPU) = 9.8712405367667715E-003 -Relative difference = 0.0021934350433631634 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.477402e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.477656e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.477656e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 6.233047 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.038056e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.038085e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.038085e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.924324e-03 +- 4.918778e-03 ) GeV^-6 +TOTAL : 5.089824 sec INFO: No Floating Point Exceptions have been reported - 17,803,580,317 cycles # 2.855 GHz - 53,580,069,164 instructions # 3.01 insn per cycle - 6.237030677 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:20207) (avx2: 0) (512y: 0) (512z: 0) + 17,811,569,824 cycles # 3.498 GHz + 53,427,844,878 instructions # 3.00 insn per cycle + 5.091356551 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:20415) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087582491E-003 -Relative difference = 2.1198118933954545e-08 +Avg ME (F77/C++) = 9.8479612087257751E-003 +Relative difference = 2.119482139617284e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.307415e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.307805e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.307805e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.601039 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.248857e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.249314e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.249314e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.924321e-03 +- 4.918774e-03 ) GeV^-6 +TOTAL : 1.245315 sec INFO: No Floating Point Exceptions have been reported - 4,572,009,891 cycles # 2.850 GHz - 13,755,353,111 instructions # 3.01 insn per cycle - 1.605120576 seconds time elapsed + 4,359,180,439 cycles # 3.497 GHz + 13,748,783,981 instructions # 3.15 insn per cycle + 1.246825643 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:96606) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.847955e-03 Avg ME (F77/C++) = 9.8479546896225560E-003 Relative difference = 3.151694379513441e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.585961e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.587683e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.587683e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.806516 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 8.483628e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.485527e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.485527e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 +TOTAL : 0.624141 sec INFO: No Floating Point Exceptions have been reported - 2,141,220,761 cycles # 2.644 GHz - 4,818,439,860 instructions # 2.25 insn per cycle - 0.810543510 seconds time elapsed + 2,063,901,123 cycles # 3.300 GHz + 4,812,481,687 instructions # 2.33 insn per cycle + 0.625624461 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:85359) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 Avg ME (F77/C++) = 9.8929728161070967E-003 Relative difference = 1.8588234562202478e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.583228e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.585349e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.585349e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.701072 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 9.780575e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.782924e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.782924e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 +TOTAL : 0.541603 sec INFO: No Floating Point Exceptions have been reported - 1,870,651,613 cycles # 2.656 GHz - 4,275,203,774 instructions # 2.29 insn per cycle - 0.705038579 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81075) (512y: 26) (512z: 0) + 1,791,446,942 cycles # 3.300 GHz + 4,269,554,140 instructions # 2.38 insn per cycle + 0.543134521 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81101) (512y: 12) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 Avg ME (F77/C++) = 9.8929728161070967E-003 Relative difference = 1.8588234562202478e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.570140e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.572065e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.572065e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.808794 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.714704e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.715434e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.715434e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.946834e-03 +- 4.941266e-03 ) GeV^-6 +TOTAL : 0.310403 sec INFO: No Floating Point Exceptions have been reported - 1,356,929,556 cycles # 1.671 GHz - 2,164,613,956 instructions # 1.60 insn per cycle - 0.812781092 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3487) (512y: 34) (512z:79499) + 1,025,275,826 cycles # 3.290 GHz + 2,157,343,591 instructions # 2.10 insn per cycle + 0.311929273 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3497) (512y: 26) (512z:79498) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892981e-03 Avg ME (F77/C++) = 9.8929811982955140E-003 Relative difference = 2.0044060904369713e-08 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index dfab5870bc..1b5ab77bc1 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,229 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-16_14:47:13 +DATE: 2024-05-16_15:01:17 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.689455e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.689959e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.690212e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 2.179203 sec -INFO: No Floating Point Exceptions have been reported - 7,126,539,551 cycles # 2.849 GHz - 15,807,759,758 instructions # 2.22 insn per cycle - 2.559686036 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.107889e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.108154e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.108192e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.424856 sec -INFO: No Floating Point Exceptions have been reported - 10,729,635,772 cycles # 2.852 GHz - 25,204,058,412 instructions # 2.35 insn per cycle - 3.820430433 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 9.872263e-03 -Avg ME (F77/GPU) = 9.8722599015656498E-003 -Relative difference = 3.1385249252060663e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.303415e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.303607e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.303607e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 7.238072 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 9.859763e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.860008e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.860008e+01 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 5.357830 sec INFO: No Floating Point Exceptions have been reported - 19,150,406,884 cycles # 2.645 GHz - 54,154,394,762 instructions # 2.83 insn per cycle - 7.242308052 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32067) (avx2: 0) (512y: 0) (512z: 0) + 18,745,138,030 cycles # 3.498 GHz + 53,894,742,330 instructions # 2.88 insn per cycle + 5.359343965 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32196) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595861831675E-003 Relative difference = 3.457988134687711e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.497154e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.497235e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.497235e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 3.531757 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.989274e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.989371e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.989371e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 2.657296 sec INFO: No Floating Point Exceptions have been reported - 9,343,938,644 cycles # 2.643 GHz - 26,158,830,842 instructions # 2.80 insn per cycle - 3.535758073 seconds time elapsed + 9,296,382,189 cycles # 3.497 GHz + 26,144,686,060 instructions # 2.81 insn per cycle + 2.658817676 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:96007) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594844308162E-003 Relative difference = 3.5610570575237004e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.453828e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.454275e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.454275e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.534198 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.475685e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.476158e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.476158e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 1.181640 sec INFO: No Floating Point Exceptions have been reported - 4,069,691,610 cycles # 2.648 GHz - 9,228,168,046 instructions # 2.27 insn per cycle - 1.538179495 seconds time elapsed + 3,903,384,014 cycles # 3.300 GHz + 9,214,863,642 instructions # 2.36 insn per cycle + 1.183100156 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84155) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.986335e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.986931e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.986931e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.329366 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 5.128514e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.129121e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.129121e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 1.031444 sec INFO: No Floating Point Exceptions have been reported - 3,528,184,184 cycles # 2.647 GHz - 8,174,614,993 instructions # 2.32 insn per cycle - 1.333451918 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:79844) (512y: 79) (512z: 0) + 3,407,965,790 cycles # 3.300 GHz + 8,163,219,670 instructions # 2.40 insn per cycle + 1.032920025 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:79927) (512y: 45) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.367967e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.368468e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.368468e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.573046 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 8.856196e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.858028e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.858028e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 0.598999 sec INFO: No Floating Point Exceptions have been reported - 2,618,946,865 cycles # 1.661 GHz - 4,154,480,374 instructions # 1.59 insn per cycle - 1.577054610 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2046) (512y: 93) (512z:78760) + 1,975,737,268 cycles # 3.292 GHz + 4,145,284,175 instructions # 2.10 insn per cycle + 0.600485726 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2050) (512y: 61) (512z:78757) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt index 3ddfb4805b..28f52b50f2 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt @@ -1,229 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-16_14:48:06 +DATE: 2024-05-16_15:01:38 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.679279e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.679786e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.680054e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 2.178048 sec -INFO: No Floating Point Exceptions have been reported - 7,134,880,740 cycles # 2.851 GHz - 15,434,594,866 instructions # 2.16 insn per cycle - 2.558453633 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.104221e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.104483e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.104525e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.411600 sec -INFO: No Floating Point Exceptions have been reported - 10,672,973,002 cycles # 2.855 GHz - 24,521,846,399 instructions # 2.30 insn per cycle - 3.794724712 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 9.872263e-03 -Avg ME (F77/GPU) = 9.8722599015656498E-003 -Relative difference = 3.1385249252060663e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.893217e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.893439e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.893439e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.694276 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 9.878190e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.878445e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.878445e+01 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 5.347497 sec INFO: No Floating Point Exceptions have been reported - 19,121,414,788 cycles # 2.855 GHz - 54,156,458,090 instructions # 2.83 insn per cycle - 6.698138270 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32244) (avx2: 0) (512y: 0) (512z: 0) + 18,713,748,882 cycles # 3.499 GHz + 53,885,198,082 instructions # 2.88 insn per cycle + 5.348966480 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32348) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595861831675E-003 Relative difference = 3.457988134687711e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.495395e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.495480e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.495480e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 3.535528 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.975955e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.976053e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.976053e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 2.674536 sec INFO: No Floating Point Exceptions have been reported - 9,391,010,006 cycles # 2.654 GHz - 26,079,707,862 instructions # 2.78 insn per cycle - 3.539600596 seconds time elapsed + 9,357,026,662 cycles # 3.497 GHz + 26,063,941,669 instructions # 2.79 insn per cycle + 2.676039271 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:95901) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594844308162E-003 Relative difference = 3.5610570575237004e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.518532e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.518969e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.518969e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.505616 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.467419e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.467877e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.467877e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 1.183646 sec INFO: No Floating Point Exceptions have been reported - 4,001,150,405 cycles # 2.652 GHz - 9,212,868,850 instructions # 2.30 insn per cycle - 1.509560632 seconds time elapsed + 3,910,407,013 cycles # 3.300 GHz + 9,200,492,907 instructions # 2.35 insn per cycle + 1.185220543 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83776) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.985927e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.986486e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.986486e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.329709 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 5.090014e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.090640e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.090640e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 1.039104 sec INFO: No Floating Point Exceptions have been reported - 3,529,740,112 cycles # 2.648 GHz - 8,168,252,869 instructions # 2.31 insn per cycle - 1.333651402 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:79373) (512y: 229) (512z: 0) + 3,433,257,963 cycles # 3.300 GHz + 8,155,419,511 instructions # 2.38 insn per cycle + 1.040631977 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:79383) (512y: 215) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.359545e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.360045e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.360045e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.576721 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 8.836430e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.838249e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.838249e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 0.600012 sec INFO: No Floating Point Exceptions have been reported - 2,623,702,370 cycles # 1.660 GHz - 4,153,356,804 instructions # 1.58 insn per cycle - 1.580839869 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1493) (512y: 175) (512z:78776) + 1,982,102,437 cycles # 3.297 GHz + 4,144,296,349 instructions # 2.09 insn per cycle + 0.601505064 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1504) (512y: 165) (512z:78775) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 38bc670a18..c79abb2bfe 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -1,229 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-16_14:42:48 +DATE: 2024-05-16_14:59:37 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.927387e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.315718e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.634653e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.452451 sec -INFO: No Floating Point Exceptions have been reported - 1,879,085,625 cycles # 2.815 GHz - 2,632,406,951 instructions # 1.40 insn per cycle - 0.724903288 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.675522e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.208336e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.557322e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.534593 sec -INFO: No Floating Point Exceptions have been reported - 2,165,913,457 cycles # 2.812 GHz - 3,139,398,529 instructions # 1.45 insn per cycle - 0.827804422 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.424749e-01 -Avg ME (F77/GPU) = 0.14247482467490466 -Relative difference = 5.286902838873106e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.011560e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.033153e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.033153e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.642980 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.386099e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.414360e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.414360e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 1.207395 sec INFO: No Floating Point Exceptions have been reported - 4,710,402,412 cycles # 2.861 GHz - 13,462,495,012 instructions # 2.86 insn per cycle - 1.647108070 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 860) (avx2: 0) (512y: 0) (512z: 0) + 4,225,513,131 cycles # 3.496 GHz + 13,434,518,635 instructions # 3.18 insn per cycle + 1.209109512 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 864) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499484 Relative difference = 5.286896509487005e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.839775e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.910542e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.910542e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.913158 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.528838e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.623080e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.623080e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.672337 sec INFO: No Floating Point Exceptions have been reported - 2,620,816,977 cycles # 2.859 GHz - 7,551,970,333 instructions # 2.88 insn per cycle - 0.917276709 seconds time elapsed + 2,355,454,188 cycles # 3.496 GHz + 7,541,752,748 instructions # 3.20 insn per cycle + 0.673997979 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3099) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499478 Relative difference = 5.28689651338321e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.970408e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.156692e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.156692e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.574933 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.526365e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.811448e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.811448e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.385795 sec INFO: No Floating Point Exceptions have been reported - 1,480,758,822 cycles # 2.560 GHz - 3,119,703,419 instructions # 2.11 insn per cycle - 0.579132992 seconds time elapsed + 1,283,679,728 cycles # 3.315 GHz + 3,104,813,956 instructions # 2.42 insn per cycle + 0.387468424 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2917) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492595 Relative difference = 5.286901344678233e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.428889e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.682851e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.682851e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.501020 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.658264e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.962079e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.962079e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.376150 sec INFO: No Floating Point Exceptions have been reported - 1,347,520,276 cycles # 2.670 GHz - 2,981,434,055 instructions # 2.21 insn per cycle - 0.505363497 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2694) (512y: 104) (512z: 0) + 1,251,723,471 cycles # 3.316 GHz + 2,979,643,067 instructions # 2.38 insn per cycle + 0.377805001 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2745) (512y: 84) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492595 Relative difference = 5.286901344678233e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.241546e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.347861e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.347861e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.756225 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 5.340381e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.737920e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.737920e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.331292 sec INFO: No Floating Point Exceptions have been reported - 1,330,320,612 cycles # 1.751 GHz - 1,953,406,018 instructions # 1.47 insn per cycle - 0.760489864 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1372) (512y: 106) (512z: 2173) + 1,100,815,129 cycles # 3.309 GHz + 1,919,723,094 instructions # 1.74 insn per cycle + 0.332945646 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1365) (512y: 84) (512z: 2171) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492595 Relative difference = 5.286901344678233e-07 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt index 6f141963c0..51d9abbcc0 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt @@ -1,246 +1,188 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-16_15:02:27 +DATE: 2024-05-16_15:13:25 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.428295e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.103056e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.103056e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.480636 sec -INFO: No Floating Point Exceptions have been reported - 1,959,891,585 cycles # 2.818 GHz - 2,927,619,706 instructions # 1.49 insn per cycle - 0.752080667 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.157968e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.371122e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.371122e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.764001 sec -INFO: No Floating Point Exceptions have been reported - 2,873,640,599 cycles # 2.829 GHz - 4,407,079,803 instructions # 1.53 insn per cycle - 1.073816079 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.424749e-01 -Avg ME (F77/GPU) = 0.14247482467490466 -Relative difference = 5.286902838873106e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.008642e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.030371e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.030371e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.654853 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.383278e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.411625e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.411625e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 1.214078 sec INFO: No Floating Point Exceptions have been reported - 4,747,034,662 cycles # 2.862 GHz - 13,469,694,473 instructions # 2.84 insn per cycle - 1.659302078 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 860) (avx2: 0) (512y: 0) (512z: 0) + 4,247,198,847 cycles # 3.494 GHz + 13,441,802,925 instructions # 3.16 insn per cycle + 1.216035831 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 864) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499484 Relative difference = 5.286896509487005e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.820348e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.892211e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.892211e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.930951 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.521394e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.615345e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.615345e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.678956 sec INFO: No Floating Point Exceptions have been reported - 2,665,977,292 cycles # 2.852 GHz - 7,601,998,240 instructions # 2.85 insn per cycle - 0.935555380 seconds time elapsed + 2,376,586,422 cycles # 3.492 GHz + 7,591,894,788 instructions # 3.19 insn per cycle + 0.680920167 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3099) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499478 Relative difference = 5.28689651338321e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.068620e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.272960e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.272960e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.564348 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.507955e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.788891e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.788891e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.392379 sec INFO: No Floating Point Exceptions have been reported - 1,513,664,570 cycles # 2.669 GHz - 3,168,463,518 instructions # 2.09 insn per cycle - 0.568761168 seconds time elapsed + 1,307,070,860 cycles # 3.317 GHz + 3,156,514,594 instructions # 2.41 insn per cycle + 0.394336567 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2917) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492595 Relative difference = 5.286901344678233e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.408389e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.655047e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.655047e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.511104 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.627192e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.926583e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.926583e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.383626 sec INFO: No Floating Point Exceptions have been reported - 1,377,582,779 cycles # 2.675 GHz - 3,030,644,125 instructions # 2.20 insn per cycle - 0.515560343 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2694) (512y: 104) (512z: 0) + 1,278,297,344 cycles # 3.318 GHz + 3,030,841,272 instructions # 2.37 insn per cycle + 0.385587020 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2745) (512y: 84) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492595 Relative difference = 5.286901344678233e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.221799e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.329402e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.329402e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.769820 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 5.313342e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.706568e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.706568e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.338091 sec INFO: No Floating Point Exceptions have been reported - 1,366,102,927 cycles # 1.765 GHz - 1,991,071,116 instructions # 1.46 insn per cycle - 0.774386560 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1372) (512y: 106) (512z: 2173) + 1,125,703,318 cycles # 3.313 GHz + 1,959,620,458 instructions # 1.74 insn per cycle + 0.340051363 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1365) (512y: 84) (512z: 2171) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492595 Relative difference = 5.286901344678233e-07 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt index 25b8d3c885..68fb87c782 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt @@ -1,229 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-16_14:43:01 +DATE: 2024-05-16_14:59:42 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.907170e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.197971e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.504611e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.455771 sec -INFO: No Floating Point Exceptions have been reported - 1,881,865,516 cycles # 2.813 GHz - 2,669,782,801 instructions # 1.42 insn per cycle - 0.727786761 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.641992e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.081273e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.416654e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.532301 sec -INFO: No Floating Point Exceptions have been reported - 2,167,822,822 cycles # 2.823 GHz - 3,120,353,321 instructions # 1.44 insn per cycle - 0.825343283 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.424749e-01 -Avg ME (F77/GPU) = 0.14247482467490466 -Relative difference = 5.286902838873106e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.007784e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.029112e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.029112e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.648995 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.397006e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.425726e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.425726e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 1.198001 sec INFO: No Floating Point Exceptions have been reported - 4,725,323,359 cycles # 2.860 GHz - 13,457,369,308 instructions # 2.85 insn per cycle - 1.653142214 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 849) (avx2: 0) (512y: 0) (512z: 0) + 4,195,031,738 cycles # 3.498 GHz + 13,428,169,197 instructions # 3.20 insn per cycle + 1.199627126 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 853) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499484 Relative difference = 5.286896509487005e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.833913e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.904030e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.904030e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.915889 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.509751e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.602059e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.602059e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.676983 sec INFO: No Floating Point Exceptions have been reported - 2,628,184,982 cycles # 2.858 GHz - 7,551,273,836 instructions # 2.87 insn per cycle - 0.920086997 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3092) (avx2: 0) (512y: 0) (512z: 0) + 2,371,091,046 cycles # 3.495 GHz + 7,538,133,395 instructions # 3.18 insn per cycle + 0.678626428 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3094) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499478 Relative difference = 5.28689651338321e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.116183e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.320457e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.320457e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.548674 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.525255e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.811956e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.811956e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.385964 sec INFO: No Floating Point Exceptions have been reported - 1,476,841,675 cycles # 2.675 GHz - 3,117,924,257 instructions # 2.11 insn per cycle - 0.552738607 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2900) (512y: 0) (512z: 0) + 1,284,306,935 cycles # 3.315 GHz + 3,102,646,201 instructions # 2.42 insn per cycle + 0.387645337 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2901) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492595 Relative difference = 5.286901344678233e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.456247e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.706124e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.706124e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.496887 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.660507e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.964576e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.964576e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.375829 sec INFO: No Floating Point Exceptions have been reported - 1,340,057,166 cycles # 2.677 GHz - 2,978,732,248 instructions # 2.22 insn per cycle - 0.501058940 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2670) (512y: 104) (512z: 0) + 1,250,747,878 cycles # 3.316 GHz + 2,974,193,052 instructions # 2.38 insn per cycle + 0.377484944 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2721) (512y: 84) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492595 Relative difference = 5.286901344678233e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.241283e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.347840e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.347840e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.757118 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 5.348464e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.748420e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.748420e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.330689 sec INFO: No Floating Point Exceptions have been reported - 1,329,966,748 cycles # 1.749 GHz - 1,951,787,640 instructions # 1.47 insn per cycle - 0.761356492 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1348) (512y: 106) (512z: 2173) + 1,100,885,696 cycles # 3.315 GHz + 1,916,709,402 instructions # 1.74 insn per cycle + 0.332395868 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1341) (512y: 84) (512z: 2171) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492595 Relative difference = 5.286901344678233e-07 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 88eaa7d80d..a36100034f 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -1,229 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-16_14:43:15 +DATE: 2024-05-16_14:59:46 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.867335e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.223690e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.343650e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 -TOTAL : 0.450195 sec -INFO: No Floating Point Exceptions have been reported - 1,886,543,936 cycles # 2.814 GHz - 2,627,629,254 instructions # 1.39 insn per cycle - 0.729554150 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 167 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.183442e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.842494e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.962990e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.571361e+02 +- 2.114021e+02 ) GeV^-2 -TOTAL : 0.482910 sec -INFO: No Floating Point Exceptions have been reported - 1,994,696,147 cycles # 2.812 GHz - 2,828,466,882 instructions # 1.42 insn per cycle - 0.766894337 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.424226e-01 -Avg ME (F77/GPU) = 0.14247488790821983 -Relative difference = 0.00036713209996037764 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.069532e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.093791e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.093791e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.553490 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.485753e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.520480e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.520480e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.945528e+02 +- 1.186199e+02 ) GeV^-2 +TOTAL : 1.126850 sec INFO: No Floating Point Exceptions have been reported - 4,455,366,971 cycles # 2.862 GHz - 13,047,769,817 instructions # 2.93 insn per cycle - 1.557630020 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 745) (avx2: 0) (512y: 0) (512z: 0) + 3,940,537,374 cycles # 3.493 GHz + 13,031,883,273 instructions # 3.31 insn per cycle + 1.128447112 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 748) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 Avg ME (F77/C++) = 0.14246857540270419 Relative difference = 1.7265064590569047e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.869084e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.052765e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.052765e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 -TOTAL : 0.592151 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.870722e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.117280e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.117280e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.945527e+02 +- 1.186198e+02 ) GeV^-2 +TOTAL : 0.446176 sec INFO: No Floating Point Exceptions have been reported - 1,701,146,602 cycles # 2.856 GHz - 4,512,165,265 instructions # 2.65 insn per cycle - 0.596248693 seconds time elapsed + 1,563,614,285 cycles # 3.494 GHz + 4,506,162,514 instructions # 2.88 insn per cycle + 0.447718655 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3600) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 Avg ME (F77/C++) = 0.14246859631675157 Relative difference = 2.5853054135974944e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.609679e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.315056e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.315056e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.313413 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 7.914888e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.990599e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.990599e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.947128e+02 +- 1.186880e+02 ) GeV^-2 +TOTAL : 0.229557 sec INFO: No Floating Point Exceptions have been reported - 850,737,642 cycles # 2.684 GHz - 1,895,945,890 instructions # 2.23 insn per cycle - 0.317546154 seconds time elapsed + 766,484,742 cycles # 3.320 GHz + 1,884,106,082 instructions # 2.46 insn per cycle + 0.231166023 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3491) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247489318272599 Relative difference = 4.784894739577799e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.973396e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.785303e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.785303e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.295859 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 7.880271e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.870021e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.870021e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.947128e+02 +- 1.186880e+02 ) GeV^-2 +TOTAL : 0.231018 sec INFO: No Floating Point Exceptions have been reported - 801,819,935 cycles # 2.679 GHz - 1,819,229,849 instructions # 2.27 insn per cycle - 0.299944027 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3335) (512y: 22) (512z: 0) + 770,973,435 cycles # 3.319 GHz + 1,810,844,093 instructions # 2.35 insn per cycle + 0.232626961 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3390) (512y: 2) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247489318272599 Relative difference = 4.784894739577799e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.354956e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.770974e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.770974e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.400076 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.007666e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.178962e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.178962e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.947128e+02 +- 1.186880e+02 ) GeV^-2 +TOTAL : 0.185655 sec INFO: No Floating Point Exceptions have been reported - 733,009,701 cycles # 1.817 GHz - 1,304,250,799 instructions # 1.78 insn per cycle - 0.404216975 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1973) (512y: 32) (512z: 2382) + 619,563,101 cycles # 3.313 GHz + 1,280,713,055 instructions # 2.07 insn per cycle + 0.187266622 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1976) (512y: 8) (512z: 2382) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247489383243206 Relative difference = 4.32888033512879e-08 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt index b62a8a0309..3975933cab 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt @@ -1,246 +1,188 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-16_15:02:40 +DATE: 2024-05-16_15:13:30 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.337579e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.030007e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.030007e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.017654e+01 +- 1.429184e+01 ) GeV^-2 -TOTAL : 0.462122 sec -INFO: No Floating Point Exceptions have been reported - 1,901,719,201 cycles # 2.816 GHz - 2,811,032,752 instructions # 1.48 insn per cycle - 0.731978994 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 167 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.907303e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.566216e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.566216e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.609942e+02 +- 2.115590e+02 ) GeV^-2 -TOTAL : 0.637098 sec -INFO: No Floating Point Exceptions have been reported - 2,459,040,544 cycles # 2.824 GHz - 3,715,271,980 instructions # 1.51 insn per cycle - 0.927773682 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.424226e-01 -Avg ME (F77/GPU) = 0.14247488790821983 -Relative difference = 0.00036713209996037764 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.068152e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.092782e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.092782e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.559403 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.486105e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.520771e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.520771e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.945528e+02 +- 1.186199e+02 ) GeV^-2 +TOTAL : 1.128596 sec INFO: No Floating Point Exceptions have been reported - 4,475,912,555 cycles # 2.864 GHz - 13,052,235,712 instructions # 2.92 insn per cycle - 1.563691095 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 745) (avx2: 0) (512y: 0) (512z: 0) + 3,949,924,947 cycles # 3.495 GHz + 13,036,365,567 instructions # 3.30 insn per cycle + 1.130311976 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 748) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 Avg ME (F77/C++) = 0.14246857540270419 Relative difference = 1.7265064590569047e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.856394e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.039884e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.039884e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 -TOTAL : 0.599599 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.876468e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.123023e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.123023e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.945527e+02 +- 1.186198e+02 ) GeV^-2 +TOTAL : 0.448714 sec INFO: No Floating Point Exceptions have been reported - 1,723,185,860 cycles # 2.856 GHz - 4,560,285,596 instructions # 2.65 insn per cycle - 0.603925442 seconds time elapsed + 1,571,333,248 cycles # 3.493 GHz + 4,554,717,145 instructions # 2.90 insn per cycle + 0.450463633 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3600) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 Avg ME (F77/C++) = 0.14246859631675157 Relative difference = 2.5853054135974944e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.545801e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.241062e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.241062e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.321141 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 7.868753e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.932885e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.932885e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.947128e+02 +- 1.186880e+02 ) GeV^-2 +TOTAL : 0.233735 sec INFO: No Floating Point Exceptions have been reported - 871,513,310 cycles # 2.683 GHz - 1,932,959,243 instructions # 2.22 insn per cycle - 0.325378385 seconds time elapsed + 781,080,214 cycles # 3.320 GHz + 1,921,555,601 instructions # 2.46 insn per cycle + 0.235573975 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3491) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247489318272599 Relative difference = 4.784894739577799e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.891127e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.696072e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.696072e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.304221 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 8.150771e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.214813e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.214813e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.947128e+02 +- 1.186880e+02 ) GeV^-2 +TOTAL : 0.227020 sec INFO: No Floating Point Exceptions have been reported - 825,995,486 cycles # 2.683 GHz - 1,856,161,781 instructions # 2.25 insn per cycle - 0.308416114 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3335) (512y: 22) (512z: 0) + 758,893,869 cycles # 3.321 GHz + 1,847,792,857 instructions # 2.43 insn per cycle + 0.228761763 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3390) (512y: 2) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247489318272599 Relative difference = 4.784894739577799e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.307702e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.720545e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.720545e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.408443 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 9.952242e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.166037e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.166037e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.947128e+02 +- 1.186880e+02 ) GeV^-2 +TOTAL : 0.191095 sec INFO: No Floating Point Exceptions have been reported - 755,445,387 cycles # 1.833 GHz - 1,345,989,570 instructions # 1.78 insn per cycle - 0.412779323 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1973) (512y: 32) (512z: 2382) + 638,761,005 cycles # 3.316 GHz + 1,322,785,295 instructions # 2.07 insn per cycle + 0.192912452 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1976) (512y: 8) (512z: 2382) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247489383243206 Relative difference = 4.32888033512879e-08 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt index f782cd39a5..c30289d39f 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt @@ -1,229 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-16_14:43:27 +DATE: 2024-05-16_14:59:50 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.882997e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.225822e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.344729e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 -TOTAL : 0.447687 sec -INFO: No Floating Point Exceptions have been reported - 1,891,564,072 cycles # 2.816 GHz - 2,660,739,786 instructions # 1.41 insn per cycle - 0.729746219 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 167 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.107850e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.805980e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.921999e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.571361e+02 +- 2.114021e+02 ) GeV^-2 -TOTAL : 0.485790 sec -INFO: No Floating Point Exceptions have been reported - 1,996,906,378 cycles # 2.807 GHz - 2,867,667,096 instructions # 1.44 insn per cycle - 0.769333150 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.424226e-01 -Avg ME (F77/GPU) = 0.14247488790821983 -Relative difference = 0.00036713209996037764 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.069812e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.094168e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.094168e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.553096 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.494286e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.529263e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.529263e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.945528e+02 +- 1.186199e+02 ) GeV^-2 +TOTAL : 1.120381 sec INFO: No Floating Point Exceptions have been reported - 4,454,505,799 cycles # 2.862 GHz - 13,029,391,838 instructions # 2.92 insn per cycle - 1.557292510 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 727) (avx2: 0) (512y: 0) (512z: 0) + 3,920,701,706 cycles # 3.495 GHz + 13,012,702,962 instructions # 3.32 insn per cycle + 1.121977841 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 732) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 Avg ME (F77/C++) = 0.14246857540270419 Relative difference = 1.7265064590569047e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.876347e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.060596e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.060596e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 -TOTAL : 0.590255 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.864338e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.109744e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.109744e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.945527e+02 +- 1.186198e+02 ) GeV^-2 +TOTAL : 0.446774 sec INFO: No Floating Point Exceptions have been reported - 1,693,495,983 cycles # 2.852 GHz - 4,508,141,451 instructions # 2.66 insn per cycle - 0.594398488 seconds time elapsed + 1,565,647,947 cycles # 3.494 GHz + 4,502,366,916 instructions # 2.88 insn per cycle + 0.448316706 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3588) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 Avg ME (F77/C++) = 0.14246859631675157 Relative difference = 2.5853054135974944e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.574680e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.273652e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.273652e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.315080 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 7.941053e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.024320e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.024320e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.947128e+02 +- 1.186880e+02 ) GeV^-2 +TOTAL : 0.228791 sec INFO: No Floating Point Exceptions have been reported - 851,359,645 cycles # 2.672 GHz - 1,893,112,803 instructions # 2.22 insn per cycle - 0.319204462 seconds time elapsed + 763,896,479 cycles # 3.320 GHz + 1,881,145,730 instructions # 2.46 insn per cycle + 0.230398503 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3461) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247489318272599 Relative difference = 4.784894739577799e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.978403e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.785893e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.785893e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.295093 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 8.226785e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.314429e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.314429e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.947128e+02 +- 1.186880e+02 ) GeV^-2 +TOTAL : 0.222236 sec INFO: No Floating Point Exceptions have been reported - 799,712,323 cycles # 2.678 GHz - 1,814,979,638 instructions # 2.27 insn per cycle - 0.299228201 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3298) (512y: 22) (512z: 0) + 742,272,381 cycles # 3.320 GHz + 1,806,863,379 instructions # 2.43 insn per cycle + 0.223884202 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3353) (512y: 2) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247489318272599 Relative difference = 4.784894739577799e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.317992e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.737735e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.737735e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.402763 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.007631e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.177487e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.177487e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.947128e+02 +- 1.186880e+02 ) GeV^-2 +TOTAL : 0.185723 sec INFO: No Floating Point Exceptions have been reported - 736,511,578 cycles # 1.812 GHz - 1,302,115,541 instructions # 1.77 insn per cycle - 0.406867415 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1936) (512y: 32) (512z: 2382) + 619,200,970 cycles # 3.309 GHz + 1,278,602,814 instructions # 2.06 insn per cycle + 0.187418956 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1939) (512y: 8) (512z: 2382) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247489383243206 Relative difference = 4.32888033512879e-08 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index 265a4a7626..3a946024f8 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -1,229 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-16_14:43:39 +DATE: 2024-05-16_14:59:53 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.940149e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.336219e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.662963e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.452840 sec -INFO: No Floating Point Exceptions have been reported - 1,880,363,198 cycles # 2.808 GHz - 2,677,692,820 instructions # 1.42 insn per cycle - 0.726161506 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.684159e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.236315e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.588311e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.530728 sec -INFO: No Floating Point Exceptions have been reported - 2,164,642,485 cycles # 2.821 GHz - 3,145,530,012 instructions # 1.45 insn per cycle - 0.824333778 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.424749e-01 -Avg ME (F77/GPU) = 0.14247482577104625 -Relative difference = 5.209967070245855e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.003476e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.024445e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.024445e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.655769 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.382158e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.410400e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.410400e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 1.210878 sec INFO: No Floating Point Exceptions have been reported - 4,745,491,139 cycles # 2.860 GHz - 13,466,039,366 instructions # 2.84 insn per cycle - 1.659848552 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 840) (avx2: 0) (512y: 0) (512z: 0) + 4,238,290,901 cycles # 3.496 GHz + 13,408,111,909 instructions # 3.16 insn per cycle + 1.212529715 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 836) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482734618697 Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.849332e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.920343e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.920343e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.908133 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.561388e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.658011e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.658011e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.664044 sec INFO: No Floating Point Exceptions have been reported - 2,605,721,632 cycles # 2.858 GHz - 7,384,650,569 instructions # 2.83 insn per cycle - 0.912227813 seconds time elapsed + 2,325,909,827 cycles # 3.495 GHz + 7,374,349,981 instructions # 3.17 insn per cycle + 0.665822359 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3073) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482734618697 Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.133010e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.340359e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.340359e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.546033 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.607029e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.900618e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.900618e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.379426 sec INFO: No Floating Point Exceptions have been reported - 1,469,888,298 cycles # 2.674 GHz - 3,055,461,884 instructions # 2.08 insn per cycle - 0.550169150 seconds time elapsed + 1,262,584,228 cycles # 3.316 GHz + 3,036,424,627 instructions # 2.40 insn per cycle + 0.381094498 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3013) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.544324e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.807645e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.807645e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.485213 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.781169e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.102295e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.102295e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.367326 sec INFO: No Floating Point Exceptions have been reported - 1,307,959,720 cycles # 2.676 GHz - 2,930,377,532 instructions # 2.24 insn per cycle - 0.489382978 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2799) (512y: 110) (512z: 0) + 1,222,602,925 cycles # 3.315 GHz + 2,924,042,140 instructions # 2.39 insn per cycle + 0.368965475 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2850) (512y: 90) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.172350e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.272043e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.272043e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.779592 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 5.115607e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.478719e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.478719e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.344683 sec INFO: No Floating Point Exceptions have been reported - 1,368,592,699 cycles # 1.747 GHz - 1,969,378,714 instructions # 1.44 insn per cycle - 0.783958712 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1700) (512y: 114) (512z: 2171) + 1,147,285,653 cycles # 3.314 GHz + 1,931,282,298 instructions # 1.68 insn per cycle + 0.346427078 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1693) (512y: 92) (512z: 2169) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt index 84e80111cc..095b604bbf 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt @@ -1,229 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-16_14:43:53 +DATE: 2024-05-16_14:59:58 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.890956e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.181054e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.513059e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.453973 sec -INFO: No Floating Point Exceptions have been reported - 1,876,167,670 cycles # 2.808 GHz - 2,662,885,558 instructions # 1.42 insn per cycle - 0.726739496 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.642147e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.081360e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.416296e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.533908 sec -INFO: No Floating Point Exceptions have been reported - 2,163,893,097 cycles # 2.818 GHz - 3,132,561,280 instructions # 1.45 insn per cycle - 0.826852700 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.424749e-01 -Avg ME (F77/GPU) = 0.14247482577104625 -Relative difference = 5.209967070245855e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.007176e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.028375e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.028375e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.650928 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.382537e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.410702e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.410702e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 1.210256 sec INFO: No Floating Point Exceptions have been reported - 4,733,031,285 cycles # 2.861 GHz - 13,451,191,160 instructions # 2.84 insn per cycle - 1.655053441 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 827) (avx2: 0) (512y: 0) (512z: 0) + 4,238,104,879 cycles # 3.498 GHz + 13,396,938,147 instructions # 3.16 insn per cycle + 1.211923293 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 826) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482734618697 Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.847760e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.919370e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.919370e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.909156 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.560416e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.657071e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.657071e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.664319 sec INFO: No Floating Point Exceptions have been reported - 2,606,818,939 cycles # 2.857 GHz - 7,388,977,556 instructions # 2.83 insn per cycle - 0.913243210 seconds time elapsed + 2,325,818,465 cycles # 3.494 GHz + 7,370,246,475 instructions # 3.17 insn per cycle + 0.666036769 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3062) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482734618697 Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.915489e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.093943e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.093943e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.585235 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.610639e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.904142e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.904142e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.378982 sec INFO: No Floating Point Exceptions have been reported - 1,469,957,671 cycles # 2.496 GHz - 3,055,084,256 instructions # 2.08 insn per cycle - 0.589443028 seconds time elapsed + 1,261,007,643 cycles # 3.316 GHz + 3,032,344,059 instructions # 2.40 insn per cycle + 0.380605351 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2990) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.535422e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.797003e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.797003e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.486063 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.784483e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.105789e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.105789e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.366888 sec INFO: No Floating Point Exceptions have been reported - 1,306,700,125 cycles # 2.669 GHz - 2,930,583,524 instructions # 2.24 insn per cycle - 0.490171496 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2775) (512y: 110) (512z: 0) + 1,221,624,837 cycles # 3.317 GHz + 2,920,392,755 instructions # 2.39 insn per cycle + 0.368559603 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2826) (512y: 90) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.173668e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.273111e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.273111e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.778991 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 5.106556e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.468206e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.468206e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.345213 sec INFO: No Floating Point Exceptions have been reported - 1,367,910,665 cycles # 1.749 GHz - 1,969,371,455 instructions # 1.44 insn per cycle - 0.783143035 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1676) (512y: 114) (512z: 2171) + 1,146,891,573 cycles # 3.308 GHz + 1,929,354,616 instructions # 1.68 insn per cycle + 0.346978661 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1669) (512y: 92) (512z: 2169) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt index 8af6873425..f2b066352d 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-05-16_15:20:33 +DATE: 2024-05-16_15:27:03 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.588343e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.081541e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.176224e+08 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 0.528808 sec -INFO: No Floating Point Exceptions have been reported - 2,192,111,166 cycles # 2.821 GHz - 3,135,008,318 instructions # 1.43 insn per cycle - 0.833908791 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 4.313472e+00 -Avg ME (F77/GPU) = 4.3134710926110280 -Relative difference = 2.1036162329561614e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.865233e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.915227e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.915227e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 5.734356 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.552827e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.615371e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.615371e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 4.237983 sec INFO: No Floating Point Exceptions have been reported - 16,430,057,220 cycles # 2.863 GHz - 42,484,854,801 instructions # 2.59 insn per cycle - 5.739849036 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 711) (avx2: 0) (512y: 0) (512z: 0) + 14,834,141,883 cycles # 3.498 GHz + 42,358,662,682 instructions # 2.86 insn per cycle + 4.240692019 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 715) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105795 Relative difference = 2.1036172727915933e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.235376e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.401567e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.401567e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.353450 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.429207e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.635680e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.635680e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 2.495408 sec INFO: No Floating Point Exceptions have been reported - 9,612,345,009 cycles # 2.863 GHz - 26,317,248,003 instructions # 2.74 insn per cycle - 3.358813940 seconds time elapsed + 8,733,845,375 cycles # 3.496 GHz + 26,311,106,876 instructions # 3.01 insn per cycle + 2.498254604 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2388) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105804 Relative difference = 2.103617270732513e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.244474e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.678972e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.678972e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.111219 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 7.512447e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.100995e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.100995e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 1.524145 sec INFO: No Floating Point Exceptions have been reported - 5,673,148,574 cycles # 2.682 GHz - 12,029,125,150 instructions # 2.12 insn per cycle - 2.116589548 seconds time elapsed + 5,082,843,549 cycles # 3.329 GHz + 12,004,216,763 instructions # 2.36 insn per cycle + 1.527047583 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2532) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.759844e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.282682e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.282682e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 1.934603 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 7.691238e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.313521e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.313521e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 1.488661 sec INFO: No Floating Point Exceptions have been reported - 5,185,525,755 cycles # 2.675 GHz - 11,158,849,555 instructions # 2.15 insn per cycle - 1.940086470 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2195) (512y: 148) (512z: 0) + 4,964,913,708 cycles # 3.329 GHz + 11,230,782,048 instructions # 2.26 insn per cycle + 1.491517590 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2246) (512y: 128) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.492671e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.676216e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.676216e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.114794 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 7.995948e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.646934e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.646934e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 1.436277 sec INFO: No Floating Point Exceptions have been reported - 5,530,850,143 cycles # 1.773 GHz - 8,071,834,418 instructions # 1.46 insn per cycle - 3.120392658 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1471) (512y: 129) (512z: 1684) + 4,789,679,060 cycles # 3.329 GHz + 7,949,916,272 instructions # 1.66 insn per cycle + 1.439141121 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1464) (512y: 107) (512z: 1682) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt index 746b04ecac..d2f36f71b3 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-05-16_15:20:58 +DATE: 2024-05-16_15:27:16 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.594523e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.092654e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.188255e+08 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 0.533303 sec -INFO: No Floating Point Exceptions have been reported - 2,159,610,833 cycles # 2.816 GHz - 3,095,961,302 instructions # 1.43 insn per cycle - 0.825364511 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 4.313472e+00 -Avg ME (F77/GPU) = 4.3134710926110280 -Relative difference = 2.1036162329561614e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.884407e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.935333e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.935333e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 5.676327 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.535565e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.597334e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.597334e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 4.265484 sec INFO: No Floating Point Exceptions have been reported - 16,262,813,557 cycles # 2.863 GHz - 43,266,807,177 instructions # 2.66 insn per cycle - 5.681729392 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 662) (avx2: 0) (512y: 0) (512z: 0) + 14,925,074,351 cycles # 3.497 GHz + 43,040,455,238 instructions # 2.88 insn per cycle + 4.268333311 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 661) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105795 Relative difference = 2.1036172727915933e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.290556e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.463505e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.463505e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.298765 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.582132e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.803494e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.803494e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 2.416903 sec INFO: No Floating Point Exceptions have been reported - 9,454,937,516 cycles # 2.862 GHz - 25,430,832,847 instructions # 2.69 insn per cycle - 3.304226277 seconds time elapsed + 8,463,102,793 cycles # 3.498 GHz + 25,423,102,317 instructions # 3.00 insn per cycle + 2.419752177 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2268) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105804 Relative difference = 2.103617270732513e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.695348e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.042916e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.042916e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.345615 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 6.828940e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.313322e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.313322e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 1.664399 sec INFO: No Floating Point Exceptions have been reported - 6,296,882,273 cycles # 2.679 GHz - 13,638,682,807 instructions # 2.17 insn per cycle - 2.351107442 seconds time elapsed + 5,543,617,863 cycles # 3.326 GHz + 13,614,779,414 instructions # 2.46 insn per cycle + 1.667241581 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2629) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.910957e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.286382e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.286382e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.246822 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 6.990714e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.496407e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.496407e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 1.626291 sec INFO: No Floating Point Exceptions have been reported - 6,026,491,701 cycles # 2.677 GHz - 12,722,860,113 instructions # 2.11 insn per cycle - 2.252413644 seconds time elapsed + 5,418,315,687 cycles # 3.327 GHz + 12,664,280,464 instructions # 2.34 insn per cycle + 1.629124739 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2146) (512y: 296) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.420299e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.596534e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.596534e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.177504 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 8.106458e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.780667e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.780667e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 1.418617 sec INFO: No Floating Point Exceptions have been reported - 5,627,100,070 cycles # 1.769 GHz - 8,928,441,764 instructions # 1.59 insn per cycle - 3.183062200 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1357) (512y: 171) (512z: 1777) + 4,731,424,271 cycles # 3.329 GHz + 8,833,879,203 instructions # 1.87 insn per cycle + 1.421431673 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1353) (512y: 160) (512z: 1776) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt index a9079e9716..64cd5aa370 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt @@ -1,102 +1,71 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-05-16_15:21:23 +DATE: 2024-05-16_15:27:28 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.566221e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.504693e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.775023e+08 ) sec^-1 -MeanMatrixElemValue = ( 7.154219e+00 +- 1.620281e-01 ) GeV^0 -TOTAL : 0.485925 sec -INFO: No Floating Point Exceptions have been reported - 2,003,287,538 cycles # 2.816 GHz - 2,880,414,118 instructions # 1.44 insn per cycle - 0.769648039 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 4.313490e+00 -Avg ME (F77/GPU) = 4.3136695463908836 -Relative difference = 4.162439020000051e-05 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.938364e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.994818e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.994818e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.175644e+00 +- 1.658767e-01 ) GeV^0 -TOTAL : 5.499884 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.703429e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.782201e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.782201e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.315915e+00 +- 1.953829e-01 ) GeV^0 +TOTAL : 3.995435 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 15,743,516,639 cycles # 2.861 GHz - 42,225,863,593 instructions # 2.68 insn per cycle - 5.505101290 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 601) (avx2: 0) (512y: 0) (512z: 0) + 13,985,040,470 cycles # 3.498 GHz + 42,109,829,772 instructions # 3.01 insn per cycle + 3.998021323 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 604) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313574e+00 @@ -104,29 +73,29 @@ Avg ME (F77/C++) = 4.3135739049175754 Relative difference = 2.2042608890083832e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.494085e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.834702e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.834702e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.175642e+00 +- 1.658767e-01 ) GeV^0 -TOTAL : 2.423560 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 6.001795e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.414096e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.414096e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.315913e+00 +- 1.953828e-01 ) GeV^0 +TOTAL : 1.861772 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,948,197,620 cycles # 2.861 GHz - 16,919,710,710 instructions # 2.44 insn per cycle - 2.428887408 seconds time elapsed + 6,514,230,564 cycles # 3.495 GHz + 16,926,292,213 instructions # 2.60 insn per cycle + 1.864397069 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2983) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313572e+00 @@ -134,29 +103,29 @@ Avg ME (F77/C++) = 4.3135722205042839 Relative difference = 5.111872113533787e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.820914e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.816967e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.816967e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.429543 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.100363e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.229661e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.229661e+06 ) sec^-1 +MeanMatrixElemValue = ( 7.289195e+00 +- 1.809101e-01 ) GeV^0 +TOTAL : 1.062731 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 3,855,960,900 cycles # 2.689 GHz - 7,989,689,028 instructions # 2.07 insn per cycle - 1.434693752 seconds time elapsed + 3,552,382,042 cycles # 3.335 GHz + 7,970,082,680 instructions # 2.24 insn per cycle + 1.065333882 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3289) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313565e+00 @@ -164,29 +133,29 @@ Avg ME (F77/C++) = 4.3135645699221641 Relative difference = 9.97035713074993e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.282128e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.407558e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.407558e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.355217 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.126084e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.261376e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.261376e+06 ) sec^-1 +MeanMatrixElemValue = ( 7.289195e+00 +- 1.809101e-01 ) GeV^0 +TOTAL : 1.042775 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 3,662,603,190 cycles # 2.693 GHz - 7,491,885,625 instructions # 2.05 insn per cycle - 1.360533114 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3036) (512y: 23) (512z: 0) + 3,486,677,745 cycles # 3.336 GHz + 7,511,921,838 instructions # 2.15 insn per cycle + 1.045365922 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3091) (512y: 3) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313565e+00 @@ -194,29 +163,29 @@ Avg ME (F77/C++) = 4.3135645699221641 Relative difference = 9.97035713074993e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.072932e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.653576e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.653576e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.816585 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.331577e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.530592e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.530592e+06 ) sec^-1 +MeanMatrixElemValue = ( 7.289196e+00 +- 1.809101e-01 ) GeV^0 +TOTAL : 0.899900 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 3,322,287,385 cycles # 1.825 GHz - 5,988,754,595 instructions # 1.80 insn per cycle - 1.821834164 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2418) (512y: 32) (512z: 2031) + 3,008,871,631 cycles # 3.335 GHz + 5,914,260,675 instructions # 1.97 insn per cycle + 0.902557879 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2421) (512y: 8) (512z: 2031) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313564e+00 diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt index 0359df7b77..7b8532ad59 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt @@ -1,102 +1,71 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-05-16_15:21:44 +DATE: 2024-05-16_15:27:39 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.575897e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.505600e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.778243e+08 ) sec^-1 -MeanMatrixElemValue = ( 7.154219e+00 +- 1.620281e-01 ) GeV^0 -TOTAL : 0.488373 sec -INFO: No Floating Point Exceptions have been reported - 2,007,752,645 cycles # 2.812 GHz - 2,828,437,251 instructions # 1.41 insn per cycle - 0.772837040 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 4.313490e+00 -Avg ME (F77/GPU) = 4.3136695463908836 -Relative difference = 4.162439020000051e-05 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.991117e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.050649e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.050649e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.175644e+00 +- 1.658767e-01 ) GeV^0 -TOTAL : 5.356246 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.800498e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.885498e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.885498e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.315915e+00 +- 1.953829e-01 ) GeV^0 +TOTAL : 3.860758 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 15,339,535,429 cycles # 2.862 GHz - 42,474,905,629 instructions # 2.77 insn per cycle - 5.361339903 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 559) (avx2: 0) (512y: 0) (512z: 0) + 13,507,580,954 cycles # 3.497 GHz + 42,358,644,316 instructions # 3.14 insn per cycle + 3.863318185 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 560) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313574e+00 @@ -104,29 +73,29 @@ Avg ME (F77/C++) = 4.3135739491553977 Relative difference = 1.1787117204016727e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.134209e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.583662e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.583662e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.175642e+00 +- 1.658767e-01 ) GeV^0 -TOTAL : 2.132369 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 7.356612e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.985601e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.985601e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.315913e+00 +- 1.953828e-01 ) GeV^0 +TOTAL : 1.540920 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,119,263,046 cycles # 2.864 GHz - 16,261,701,502 instructions # 2.66 insn per cycle - 2.137647028 seconds time elapsed + 5,396,516,030 cycles # 3.497 GHz + 16,268,270,688 instructions # 3.01 insn per cycle + 1.543527679 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2702) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313572e+00 @@ -134,29 +103,29 @@ Avg ME (F77/C++) = 4.3135722205042839 Relative difference = 5.111872113533787e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.498649e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.173623e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.173623e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.703269 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 9.346896e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.027143e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.027143e+06 ) sec^-1 +MeanMatrixElemValue = ( 7.289195e+00 +- 1.809101e-01 ) GeV^0 +TOTAL : 1.232243 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 4,581,699,390 cycles # 2.683 GHz - 9,041,394,873 instructions # 1.97 insn per cycle - 1.708700782 seconds time elapsed + 4,112,861,248 cycles # 3.331 GHz + 9,021,964,660 instructions # 2.19 insn per cycle + 1.234913176 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3558) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313565e+00 @@ -164,29 +133,29 @@ Avg ME (F77/C++) = 4.3135645687580109 Relative difference = 9.997345323075056e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.705142e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.424759e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.424759e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.652652 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 9.478929e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.042114e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.042114e+06 ) sec^-1 +MeanMatrixElemValue = ( 7.289195e+00 +- 1.809101e-01 ) GeV^0 +TOTAL : 1.217923 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 4,411,023,052 cycles # 2.662 GHz - 8,532,140,610 instructions # 1.93 insn per cycle - 1.658018216 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3311) (512y: 10) (512z: 0) + 4,064,285,992 cycles # 3.331 GHz + 8,513,710,724 instructions # 2.09 insn per cycle + 1.220504425 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3337) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313565e+00 @@ -194,29 +163,29 @@ Avg ME (F77/C++) = 4.3135645687580109 Relative difference = 9.997345323075056e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.118773e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.709641e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.709641e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.803301 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.319130e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.517145e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.517145e+06 ) sec^-1 +MeanMatrixElemValue = ( 7.289196e+00 +- 1.809101e-01 ) GeV^0 +TOTAL : 0.906747 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 3,302,699,675 cycles # 1.827 GHz - 5,958,419,273 instructions # 1.80 insn per cycle - 1.808538430 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2357) (512y: 32) (512z: 2014) + 3,034,187,177 cycles # 3.338 GHz + 5,883,924,547 instructions # 1.94 insn per cycle + 0.909399222 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2360) (512y: 8) (512z: 2014) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313564e+00 diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt index 4345b3c851..06fcaedc6d 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt @@ -1,102 +1,71 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-05-16_15:22:04 +DATE: 2024-05-16_15:27:49 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.596790e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.087710e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.182609e+08 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 0.531791 sec -INFO: No Floating Point Exceptions have been reported - 2,158,479,665 cycles # 2.816 GHz - 3,115,947,911 instructions # 1.44 insn per cycle - 0.824595914 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 4.313472e+00 -Avg ME (F77/GPU) = 4.3134711012809239 -Relative difference = 2.0835166567625394e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.739846e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.783362e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.783362e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 6.139062 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.375607e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.429535e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.429535e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 4.543809 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 17,579,172,412 cycles # 2.862 GHz - 41,767,715,738 instructions # 2.38 insn per cycle - 6.144566394 seconds time elapsed + 15,901,263,136 cycles # 3.498 GHz + 41,771,711,123 instructions # 2.63 insn per cycle + 4.546568218 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 655) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -104,29 +73,29 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.944235e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.080846e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.080846e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.674671 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.231502e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.418747e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.418747e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 2.606220 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 10,157,870,701 cycles # 2.761 GHz - 26,355,211,403 instructions # 2.59 insn per cycle - 3.680088821 seconds time elapsed + 9,126,872,302 cycles # 3.499 GHz + 26,345,829,381 instructions # 2.89 insn per cycle + 2.609067827 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2438) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -134,29 +103,29 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.512494e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.830362e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.830362e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.435332 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 6.477118e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.909959e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.909959e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 1.747382 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,512,604,303 cycles # 2.669 GHz - 12,120,159,732 instructions # 1.86 insn per cycle - 2.440902409 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2718) (512y: 0) (512z: 0) + 5,818,706,514 cycles # 3.325 GHz + 12,039,889,556 instructions # 2.07 insn per cycle + 1.750186275 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2708) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -164,29 +133,29 @@ Avg ME (F77/C++) = 4.3134712319139954 Relative difference = 1.7806676491157786e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.920988e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.300442e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.300442e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.244169 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 6.659622e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.115827e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.115827e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 1.699288 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,018,583,564 cycles # 2.676 GHz - 11,228,279,694 instructions # 1.87 insn per cycle - 2.249711111 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2369) (512y: 150) (512z: 0) + 5,659,668,657 cycles # 3.326 GHz + 11,266,150,406 instructions # 1.99 insn per cycle + 1.702085699 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2425) (512y: 130) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -194,29 +163,29 @@ Avg ME (F77/C++) = 4.3134712319139954 Relative difference = 1.7806676491157786e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.148571e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.297302e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.297302e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.442171 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 7.230568e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.765472e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.765472e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 1.575086 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,072,730,798 cycles # 1.762 GHz - 8,215,005,190 instructions # 1.35 insn per cycle - 3.447734816 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1787) (512y: 134) (512z: 1755) + 5,245,552,991 cycles # 3.325 GHz + 8,052,851,902 instructions # 1.54 insn per cycle + 1.577873671 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1780) (512y: 112) (512z: 1753) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt index fc67fec042..480631cabb 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt @@ -1,102 +1,71 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-05-16_15:22:31 +DATE: 2024-05-16_15:28:02 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.615689e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.096145e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.193163e+08 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 0.527662 sec -INFO: No Floating Point Exceptions have been reported - 2,187,091,067 cycles # 2.822 GHz - 3,143,599,790 instructions # 1.44 insn per cycle - 0.831715891 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 4.313472e+00 -Avg ME (F77/GPU) = 4.3134711012809239 -Relative difference = 2.0835166567625394e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.750132e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.794255e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.794255e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 6.103500 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.347178e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.400187e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.400187e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 4.596559 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 17,473,867,626 cycles # 2.861 GHz - 43,052,630,037 instructions # 2.46 insn per cycle - 6.108967949 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 651) (avx2: 0) (512y: 0) (512z: 0) + 16,087,359,617 cycles # 3.498 GHz + 43,040,598,944 instructions # 2.68 insn per cycle + 4.599289395 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 662) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -104,29 +73,29 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.176372e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.336517e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.336517e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.414423 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.411626e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.615612e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.615612e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 2.505649 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 9,783,940,024 cycles # 2.862 GHz - 25,167,910,576 instructions # 2.57 insn per cycle - 3.420037518 seconds time elapsed + 8,772,065,424 cycles # 3.497 GHz + 25,160,170,266 instructions # 2.87 insn per cycle + 2.508503600 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2276) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -134,29 +103,29 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.178030e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.451835e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.451835e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.622185 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 6.114431e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.500436e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.500436e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 1.844369 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 7,019,924,583 cycles # 2.672 GHz - 12,790,606,448 instructions # 1.82 insn per cycle - 2.627804246 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2699) (512y: 0) (512z: 0) + 6,139,158,659 cycles # 3.324 GHz + 12,731,086,383 instructions # 2.07 insn per cycle + 1.847184447 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2709) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -164,29 +133,29 @@ Avg ME (F77/C++) = 4.3134712319139954 Relative difference = 1.7806676491157786e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.488078e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.801083e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.801083e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.447720 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 6.276022e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.678921e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.678921e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 1.796889 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,546,937,322 cycles # 2.670 GHz - 12,109,881,739 instructions # 1.85 insn per cycle - 2.453162643 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2351) (512y: 227) (512z: 0) + 5,981,917,485 cycles # 3.324 GHz + 12,063,062,925 instructions # 2.02 insn per cycle + 1.799731034 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2379) (512y: 217) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -194,29 +163,29 @@ Avg ME (F77/C++) = 4.3134712319139954 Relative difference = 1.7806676491157786e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.983756e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.117708e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.117708e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.627336 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 7.011666e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.510292e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.510292e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 1.621074 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,377,996,877 cycles # 1.756 GHz - 8,984,744,450 instructions # 1.41 insn per cycle - 3.632964633 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1892) (512y: 178) (512z: 2083) + 5,396,377,266 cycles # 3.324 GHz + 8,870,172,709 instructions # 1.64 insn per cycle + 1.623924011 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1899) (512y: 157) (512z: 2081) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt index f2a95b68c4..6dfec42160 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt @@ -1,229 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-05-16_15:19:32 +DATE: 2024-05-16_15:26:51 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.205899e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.229515e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.233614e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.467479 sec -INFO: No Floating Point Exceptions have been reported - 1,929,394,895 cycles # 2.809 GHz - 2,774,653,842 instructions # 1.44 insn per cycle - 0.745241861 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.854750e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.994181e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.003911e+05 ) sec^-1 -MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 -TOTAL : 0.485365 sec -INFO: No Floating Point Exceptions have been reported - 1,990,830,698 cycles # 2.816 GHz - 2,942,277,354 instructions # 1.48 insn per cycle - 0.765598417 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 8.127459e-06 -Avg ME (F77/GPU) = 8.1274562860176604E-006 -Relative difference = 3.3392753366481633e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.339413e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.342602e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.342602e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.163415 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.523683e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.527922e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.527922e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.119146 sec INFO: No Floating Point Exceptions have been reported - 474,956,853 cycles # 2.847 GHz - 1,396,923,375 instructions # 2.94 insn per cycle - 0.167372542 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3991) (avx2: 0) (512y: 0) (512z: 0) + 418,376,951 cycles # 3.478 GHz + 1,390,302,519 instructions # 3.32 insn per cycle + 0.120575892 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3971) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167185E-006 Relative difference = 3.339276495559746e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.350685e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.362490e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.362490e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.088282 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 8.956577e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.973214e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.973214e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.061242 sec INFO: No Floating Point Exceptions have been reported - 246,129,842 cycles # 2.680 GHz - 699,160,574 instructions # 2.84 insn per cycle - 0.092454839 seconds time elapsed + 212,720,705 cycles # 3.409 GHz + 693,825,670 instructions # 3.26 insn per cycle + 0.062675749 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 9501) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167168E-006 Relative difference = 3.3392764976441195e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.421076e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.426847e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.426847e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.042262 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.011044e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.018814e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.018814e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.028471 sec INFO: No Floating Point Exceptions have been reported - 120,513,094 cycles # 2.641 GHz - 260,079,134 instructions # 2.16 insn per cycle - 0.046206481 seconds time elapsed + 98,055,757 cycles # 3.306 GHz + 254,719,428 instructions # 2.60 insn per cycle + 0.029885593 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8227) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.614262e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.622122e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.622122e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.037855 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.229563e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.239103e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.239103e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.025969 sec INFO: No Floating Point Exceptions have been reported - 109,022,775 cycles # 2.645 GHz - 240,308,972 instructions # 2.20 insn per cycle - 0.041904895 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7348) (512y: 150) (512z: 0) + 89,725,190 cycles # 3.307 GHz + 235,372,197 instructions # 2.62 insn per cycle + 0.027374685 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7399) (512y: 130) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.170349e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.175260e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.175260e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.050252 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.319557e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.341463e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.341463e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.018202 sec INFO: No Floating Point Exceptions have been reported - 96,595,554 cycles # 1.802 GHz - 138,452,128 instructions # 1.43 insn per cycle - 0.054148545 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1692) (512y: 126) (512z: 6592) + 62,033,907 cycles # 3.208 GHz + 132,765,845 instructions # 2.14 insn per cycle + 0.019577536 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1685) (512y: 104) (512z: 6590) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt index ca894b0a6d..485330b742 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt @@ -1,229 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-05-16_15:19:42 +DATE: 2024-05-16_15:26:53 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.237277e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.263102e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.267367e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.467317 sec -INFO: No Floating Point Exceptions have been reported - 1,933,877,717 cycles # 2.813 GHz - 2,829,779,417 instructions # 1.46 insn per cycle - 0.746133258 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.945887e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.087010e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.096853e+05 ) sec^-1 -MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 -TOTAL : 0.483889 sec -INFO: No Floating Point Exceptions have been reported - 2,005,783,112 cycles # 2.816 GHz - 2,927,359,248 instructions # 1.46 insn per cycle - 0.768925329 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 8.127459e-06 -Avg ME (F77/GPU) = 8.1274562860176604E-006 -Relative difference = 3.3392753366481633e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.344408e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.347652e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.347652e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.162339 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.504525e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.508775e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.508775e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.119069 sec INFO: No Floating Point Exceptions have been reported - 471,806,818 cycles # 2.848 GHz - 1,391,948,601 instructions # 2.95 insn per cycle - 0.166295977 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3869) (avx2: 0) (512y: 0) (512z: 0) + 417,500,473 cycles # 3.472 GHz + 1,384,122,736 instructions # 3.32 insn per cycle + 0.120521598 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3879) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167185E-006 Relative difference = 3.339276495559746e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.367799e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.379601e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.379601e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.087176 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 8.869441e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.885004e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.885004e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.061247 sec INFO: No Floating Point Exceptions have been reported - 243,999,829 cycles # 2.694 GHz - 695,186,413 instructions # 2.85 insn per cycle - 0.091139423 seconds time elapsed + 211,782,117 cycles # 3.393 GHz + 688,974,542 instructions # 3.25 insn per cycle + 0.062692430 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 9537) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167168E-006 Relative difference = 3.3392764976441195e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.395387e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.400899e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.400899e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.042187 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.994571e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.002133e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.002133e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.028171 sec INFO: No Floating Point Exceptions have been reported - 119,801,052 cycles # 2.624 GHz - 255,741,591 instructions # 2.13 insn per cycle - 0.046174431 seconds time elapsed + 97,037,553 cycles # 3.306 GHz + 249,900,445 instructions # 2.58 insn per cycle + 0.029580809 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8181) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.613988e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.621406e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.621406e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.037041 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.239951e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.249596e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.249596e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.025255 sec INFO: No Floating Point Exceptions have been reported - 106,534,081 cycles # 2.639 GHz - 235,917,118 instructions # 2.21 insn per cycle - 0.041041363 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7301) (512y: 150) (512z: 0) + 87,483,215 cycles # 3.307 GHz + 230,588,362 instructions # 2.64 insn per cycle + 0.026694294 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7352) (512y: 130) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.167962e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.172897e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.172897e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.049548 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.208887e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.229075e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.229075e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.018222 sec INFO: No Floating Point Exceptions have been reported - 94,554,513 cycles # 1.786 GHz - 133,899,064 instructions # 1.42 insn per cycle - 0.053428613 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1641) (512y: 126) (512z: 6597) + 61,027,044 cycles # 3.148 GHz + 127,983,571 instructions # 2.10 insn per cycle + 0.019612963 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1634) (512y: 104) (512z: 6595) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt index f86e27869e..552a5148b4 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt @@ -1,229 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-05-16_15:19:53 +DATE: 2024-05-16_15:26:55 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.541598e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.553658e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.556693e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.188142e-04 +- 6.565203e-04 ) GeV^-4 -TOTAL : 0.467629 sec -INFO: No Floating Point Exceptions have been reported - 1,964,166,954 cycles # 2.815 GHz - 2,823,406,286 instructions # 1.44 insn per cycle - 0.754117473 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.614317e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.731134e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.742615e+05 ) sec^-1 -MeanMatrixElemValue = ( 8.020493e-03 +- 4.025604e-03 ) GeV^-4 -TOTAL : 0.468434 sec -INFO: No Floating Point Exceptions have been reported - 1,946,164,211 cycles # 2.817 GHz - 2,847,399,547 instructions # 1.46 insn per cycle - 0.748191861 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 8.127250e-06 -Avg ME (F77/GPU) = 8.1272870954487585E-006 -Relative difference = 4.564329725014175e-06 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.448019e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.451516e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.451516e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.177153e-04 +- 6.554185e-04 ) GeV^-4 -TOTAL : 0.158474 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.594807e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.599355e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.599355e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.274746e-01 +- 1.272813e-01 ) GeV^-4 +TOTAL : 0.117292 sec INFO: No Floating Point Exceptions have been reported - 461,638,972 cycles # 2.852 GHz - 1,393,493,000 instructions # 3.02 insn per cycle - 0.162490485 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3070) (avx2: 0) (512y: 0) (512z: 0) + 411,053,000 cycles # 3.469 GHz + 1,388,257,678 instructions # 3.38 insn per cycle + 0.118765314 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3058) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127811e-06 Avg ME (F77/C++) = 8.1278105211728276E-006 Relative difference = 5.891219330978222e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.201120e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.205395e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.205395e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.177152e-04 +- 6.554185e-04 ) GeV^-4 -TOTAL : 0.048902 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.711156e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.717425e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.717425e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.274747e-01 +- 1.272814e-01 ) GeV^-4 +TOTAL : 0.033054 sec INFO: No Floating Point Exceptions have been reported - 138,099,810 cycles # 2.644 GHz - 375,723,801 instructions # 2.72 insn per cycle - 0.052805368 seconds time elapsed + 115,868,477 cycles # 3.385 GHz + 370,507,638 instructions # 3.20 insn per cycle + 0.034519034 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:10134) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127809e-06 Avg ME (F77/C++) = 8.1278090510674588E-006 Relative difference = 6.2830535070193674e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.699468e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.721720e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.721720e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 -TOTAL : 0.024500 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.972394e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.005793e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.005793e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.275184e-01 +- 1.273251e-01 ) GeV^-4 +TOTAL : 0.015435 sec INFO: No Floating Point Exceptions have been reported - 72,431,086 cycles # 2.595 GHz - 146,734,646 instructions # 2.03 insn per cycle - 0.028413255 seconds time elapsed + 54,887,348 cycles # 3.297 GHz + 141,404,697 instructions # 2.58 insn per cycle + 0.016831180 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8933) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127537e-06 Avg ME (F77/C++) = 8.1275366216540664E-006 Relative difference = 4.655111786058001e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.950281e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.979563e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.979563e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 -TOTAL : 0.023199 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.344012e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.385737e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.385737e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.275184e-01 +- 1.273251e-01 ) GeV^-4 +TOTAL : 0.014294 sec INFO: No Floating Point Exceptions have been reported - 67,511,576 cycles # 2.517 GHz - 136,466,222 instructions # 2.02 insn per cycle - 0.027372188 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8164) (512y: 28) (512z: 0) + 51,391,104 cycles # 3.292 GHz + 131,448,490 instructions # 2.56 insn per cycle + 0.015740752 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8219) (512y: 8) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127537e-06 Avg ME (F77/C++) = 8.1275366216540664E-006 Relative difference = 4.655111786058001e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.260359e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.280493e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.280493e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.165747e-04 +- 6.542824e-04 ) GeV^-4 -TOTAL : 0.028479 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 6.373023e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.462095e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.462095e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.275185e-01 +- 1.273251e-01 ) GeV^-4 +TOTAL : 0.010501 sec INFO: No Floating Point Exceptions have been reported - 59,124,236 cycles # 1.860 GHz - 85,286,285 instructions # 1.44 insn per cycle - 0.032355670 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2572) (512y: 32) (512z: 6935) + 37,353,295 cycles # 3.150 GHz + 79,834,868 instructions # 2.14 insn per cycle + 0.012002265 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2575) (512y: 8) (512z: 6935) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127537e-06 Avg ME (F77/C++) = 8.1275369863475849E-006 Relative difference = 1.6797726498700304e-09 diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt index 2af7dd76f9..07a4a566fa 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt @@ -1,229 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-05-16_15:20:03 +DATE: 2024-05-16_15:26:57 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.561126e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.572400e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.575387e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.188142e-04 +- 6.565203e-04 ) GeV^-4 -TOTAL : 0.469592 sec -INFO: No Floating Point Exceptions have been reported - 1,933,901,131 cycles # 2.816 GHz - 2,803,636,036 instructions # 1.45 insn per cycle - 0.744726293 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.901730e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.003706e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.005157e+06 ) sec^-1 -MeanMatrixElemValue = ( 8.020495e-03 +- 4.025606e-03 ) GeV^-4 -TOTAL : 0.471774 sec -INFO: No Floating Point Exceptions have been reported - 1,934,886,385 cycles # 2.815 GHz - 2,830,776,229 instructions # 1.46 insn per cycle - 0.746474254 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 8.127250e-06 -Avg ME (F77/GPU) = 8.1272870252982758E-006 -Relative difference = 4.555698209723637e-06 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.452227e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.455705e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.455705e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.177153e-04 +- 6.554185e-04 ) GeV^-4 -TOTAL : 0.157329 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.609563e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.614104e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.614104e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.274746e-01 +- 1.272813e-01 ) GeV^-4 +TOTAL : 0.116376 sec INFO: No Floating Point Exceptions have been reported - 458,573,657 cycles # 2.854 GHz - 1,388,574,447 instructions # 3.03 insn per cycle - 0.161242660 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2959) (avx2: 0) (512y: 0) (512z: 0) + 409,017,056 cycles # 3.478 GHz + 1,383,259,571 instructions # 3.38 insn per cycle + 0.117820263 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2946) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127811e-06 Avg ME (F77/C++) = 8.1278105211728276E-006 Relative difference = 5.891219330978222e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.204538e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.208976e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.208976e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.177152e-04 +- 6.554185e-04 ) GeV^-4 -TOTAL : 0.047932 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.706065e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.712338e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.712338e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.274747e-01 +- 1.272814e-01 ) GeV^-4 +TOTAL : 0.032671 sec INFO: No Floating Point Exceptions have been reported - 136,097,535 cycles # 2.652 GHz - 371,027,952 instructions # 2.73 insn per cycle - 0.051946079 seconds time elapsed + 114,247,893 cycles # 3.374 GHz + 365,724,809 instructions # 3.20 insn per cycle + 0.034093739 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:10117) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127809e-06 Avg ME (F77/C++) = 8.1278090510674588E-006 Relative difference = 6.2830535070193674e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.559391e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.580217e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.580217e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 -TOTAL : 0.024960 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.912469e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.944750e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.944750e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.275184e-01 +- 1.273251e-01 ) GeV^-4 +TOTAL : 0.015078 sec INFO: No Floating Point Exceptions have been reported - 71,167,021 cycles # 2.517 GHz - 142,031,155 instructions # 2.00 insn per cycle - 0.028974311 seconds time elapsed + 53,850,273 cycles # 3.309 GHz + 136,539,794 instructions # 2.54 insn per cycle + 0.016472996 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8887) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127537e-06 Avg ME (F77/C++) = 8.1275366216540664E-006 Relative difference = 4.655111786058001e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.102195e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.131341e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.131341e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 -TOTAL : 0.021142 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.348789e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.388103e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.388103e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.275184e-01 +- 1.273251e-01 ) GeV^-4 +TOTAL : 0.013707 sec INFO: No Floating Point Exceptions have been reported - 63,906,261 cycles # 2.611 GHz - 131,729,034 instructions # 2.06 insn per cycle - 0.025029577 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8117) (512y: 28) (512z: 0) + 49,526,307 cycles # 3.303 GHz + 126,638,085 instructions # 2.56 insn per cycle + 0.015125513 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8172) (512y: 8) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127537e-06 Avg ME (F77/C++) = 8.1275366216540664E-006 Relative difference = 4.655111786058001e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.321655e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.342179e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.342179e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.165747e-04 +- 6.542824e-04 ) GeV^-4 -TOTAL : 0.027241 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 6.362894e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.452373e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.452373e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.275185e-01 +- 1.273251e-01 ) GeV^-4 +TOTAL : 0.010004 sec INFO: No Floating Point Exceptions have been reported - 57,621,926 cycles # 1.879 GHz - 80,488,160 instructions # 1.40 insn per cycle - 0.031258526 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2521) (512y: 32) (512z: 6939) + 35,465,401 cycles # 3.143 GHz + 74,977,185 instructions # 2.11 insn per cycle + 0.011421321 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2524) (512y: 8) (512z: 6939) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127537e-06 Avg ME (F77/C++) = 8.1275369863475849E-006 Relative difference = 1.6797726498700304e-09 diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt index 16ac12981a..0c1df1c6d0 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt @@ -1,229 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-05-16_15:20:13 +DATE: 2024-05-16_15:26:59 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.172533e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.195464e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.199217e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.467059 sec -INFO: No Floating Point Exceptions have been reported - 1,929,783,722 cycles # 2.812 GHz - 2,830,067,082 instructions # 1.47 insn per cycle - 0.744348567 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.817494e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.954472e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.963776e+05 ) sec^-1 -MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 -TOTAL : 0.485210 sec -INFO: No Floating Point Exceptions have been reported - 1,989,265,248 cycles # 2.816 GHz - 2,972,405,087 instructions # 1.49 insn per cycle - 0.764721680 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 8.127459e-06 -Avg ME (F77/GPU) = 8.1274562879405200E-006 -Relative difference = 3.3369094561706885e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.312127e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.315249e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.315249e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.164695 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.480417e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.484604e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.484604e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.120324 sec INFO: No Floating Point Exceptions have been reported - 479,517,658 cycles # 2.854 GHz - 1,405,303,424 instructions # 2.93 insn per cycle - 0.168655160 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3977) (avx2: 0) (512y: 0) (512z: 0) + 422,693,979 cycles # 3.479 GHz + 1,398,982,689 instructions # 3.31 insn per cycle + 0.121787632 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3983) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562948736117E-006 Relative difference = 3.32837900190667e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.589174e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.601629e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.601629e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.085009 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 8.952459e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.974369e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.974369e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.061230 sec INFO: No Floating Point Exceptions have been reported - 242,672,694 cycles # 2.748 GHz - 691,102,866 instructions # 2.85 insn per cycle - 0.088915527 seconds time elapsed + 211,904,125 cycles # 3.396 GHz + 685,639,297 instructions # 3.24 insn per cycle + 0.062689212 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 9324) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563175290919E-006 Relative difference = 3.3005037703909805e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.402863e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.409241e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.409241e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.042757 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.020104e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.027829e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.027829e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.028377 sec INFO: No Floating Point Exceptions have been reported - 119,836,607 cycles # 2.596 GHz - 257,882,084 instructions # 2.15 insn per cycle - 0.046733316 seconds time elapsed + 97,728,139 cycles # 3.306 GHz + 252,391,837 instructions # 2.58 insn per cycle + 0.029788449 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8244) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.611690e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.620124e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.620124e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.037856 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.253977e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.263726e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.263726e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.025667 sec INFO: No Floating Point Exceptions have been reported - 108,462,768 cycles # 2.631 GHz - 238,127,423 instructions # 2.20 insn per cycle - 0.041890123 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7342) (512y: 146) (512z: 0) + 88,944,423 cycles # 3.307 GHz + 233,215,123 instructions # 2.62 insn per cycle + 0.027132704 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7393) (512y: 126) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.150674e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.155466e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.155466e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.051477 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.241277e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.262092e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.262092e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.018575 sec INFO: No Floating Point Exceptions have been reported - 99,538,839 cycles # 1.810 GHz - 139,339,349 instructions # 1.40 insn per cycle - 0.055665824 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1953) (512y: 122) (512z: 6323) + 62,726,022 cycles # 3.178 GHz + 133,561,563 instructions # 2.13 insn per cycle + 0.019979396 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1946) (512y: 100) (512z: 6321) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt index 96180e8a09..3c13a3eafa 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt @@ -1,229 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-05-16_15:20:23 +DATE: 2024-05-16_15:27:01 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.207087e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.230616e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.234507e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.468179 sec -INFO: No Floating Point Exceptions have been reported - 1,938,727,271 cycles # 2.813 GHz - 2,835,562,501 instructions # 1.46 insn per cycle - 0.747262841 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.924846e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.065621e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.075056e+05 ) sec^-1 -MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 -TOTAL : 0.482793 sec -INFO: No Floating Point Exceptions have been reported - 2,011,507,022 cycles # 2.818 GHz - 2,962,288,052 instructions # 1.47 insn per cycle - 0.770325801 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 8.127459e-06 -Avg ME (F77/GPU) = 8.1274562879405200E-006 -Relative difference = 3.3369094561706885e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.325014e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.328184e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.328184e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.163699 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.498320e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.502510e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.502510e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.119291 sec INFO: No Floating Point Exceptions have been reported - 475,740,171 cycles # 2.851 GHz - 1,400,755,519 instructions # 2.94 insn per cycle - 0.167716370 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3871) (avx2: 0) (512y: 0) (512z: 0) + 419,059,455 cycles # 3.480 GHz + 1,392,561,195 instructions # 3.32 insn per cycle + 0.120716568 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3865) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562948736117E-006 Relative difference = 3.32837900190667e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.586616e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.599028e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.599028e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.084604 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 8.996705e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.012856e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.012856e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.060413 sec INFO: No Floating Point Exceptions have been reported - 242,310,895 cycles # 2.753 GHz - 687,440,781 instructions # 2.84 insn per cycle - 0.088664129 seconds time elapsed + 209,387,891 cycles # 3.399 GHz + 681,182,721 instructions # 3.25 insn per cycle + 0.061901614 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 9365) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563175290919E-006 Relative difference = 3.3005037703909805e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.421509e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.427219e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.427219e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.041396 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.032467e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.040331e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.040331e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.027731 sec INFO: No Floating Point Exceptions have been reported - 117,633,598 cycles # 2.630 GHz - 253,582,281 instructions # 2.16 insn per cycle - 0.045344869 seconds time elapsed + 95,714,730 cycles # 3.307 GHz + 247,594,769 instructions # 2.59 insn per cycle + 0.029200183 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8196) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.533249e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.540083e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.540083e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.038782 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.254878e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.264628e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.264628e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.025136 sec INFO: No Floating Point Exceptions have been reported - 106,121,372 cycles # 2.518 GHz - 233,883,831 instructions # 2.20 insn per cycle - 0.042791740 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7292) (512y: 146) (512z: 0) + 86,937,138 cycles # 3.307 GHz + 228,402,154 instructions # 2.63 insn per cycle + 0.026539714 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7343) (512y: 126) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.148151e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.152898e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.152898e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.050273 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.275125e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.296190e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.296190e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.017773 sec INFO: No Floating Point Exceptions have been reported - 95,562,086 cycles # 1.781 GHz - 134,760,547 instructions # 1.41 insn per cycle - 0.054201969 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1904) (512y: 122) (512z: 6323) + 60,673,695 cycles # 3.196 GHz + 128,728,504 instructions # 2.12 insn per cycle + 0.019180779 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1897) (512y: 100) (512z: 6321) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt index 15f8e8659d..26ebcd33d3 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-05-16_15:18:21 +DATE: 2024-05-16_15:26:28 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.830621e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.798641e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.407520e+08 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.518896 sec -INFO: No Floating Point Exceptions have been reported - 2,130,015,467 cycles # 2.824 GHz - 3,049,782,764 instructions # 1.43 insn per cycle - 0.811167083 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 132 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 4.232897e-01 -Avg ME (F77/GPU) = 0.42328961386341946 -Relative difference = 2.034932117056294e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.652167e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.115593e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.115593e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 1.200987 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.354291e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.552617e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.552617e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 +TOTAL : 0.907102 sec INFO: No Floating Point Exceptions have been reported - 3,451,141,340 cycles # 2.863 GHz - 8,714,346,508 instructions # 2.53 insn per cycle - 1.206502072 seconds time elapsed + 3,180,797,976 cycles # 3.496 GHz + 8,725,132,831 instructions # 2.74 insn per cycle + 0.910013968 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 458) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341951 Relative difference = 2.0349321157448718e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.615216e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.136998e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.136998e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.764589 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.370898e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.106660e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.106660e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 +TOTAL : 0.572261 sec INFO: No Floating Point Exceptions have been reported - 2,197,801,743 cycles # 2.856 GHz - 5,465,338,789 instructions # 2.49 insn per cycle - 0.770190206 seconds time elapsed + 2,009,046,062 cycles # 3.495 GHz + 5,459,978,422 instructions # 2.72 insn per cycle + 0.575162423 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1298) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341951 Relative difference = 2.0349321157448718e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.276018e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.408168e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.408168e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.576218 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.422177e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.188609e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.188609e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 +TOTAL : 0.437673 sec INFO: No Floating Point Exceptions have been reported - 1,593,709,911 cycles # 2.743 GHz - 3,182,241,147 instructions # 2.00 insn per cycle - 0.581747530 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1459) (512y: 0) (512z: 0) + 1,498,454,827 cycles # 3.403 GHz + 3,159,780,709 instructions # 2.11 insn per cycle + 0.440583091 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1457) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341946 Relative difference = 2.034932117056294e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.349428e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.560869e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.560869e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.561533 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.570084e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.469999e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.469999e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 +TOTAL : 0.422625 sec INFO: No Floating Point Exceptions have been reported - 1,552,006,209 cycles # 2.741 GHz - 3,083,871,547 instructions # 1.99 insn per cycle - 0.567100846 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1274) (512y: 95) (512z: 0) + 1,446,725,428 cycles # 3.402 GHz + 3,027,985,230 instructions # 2.09 insn per cycle + 0.425578887 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1272) (512y: 95) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341946 Relative difference = 2.034932117056294e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.103380e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.012957e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.012957e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.614313 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.357154e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.401947e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.401947e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 +TOTAL : 0.368169 sec INFO: No Floating Point Exceptions have been reported - 1,344,567,311 cycles # 2.171 GHz - 2,376,857,450 instructions # 1.77 insn per cycle - 0.619905839 seconds time elapsed + 1,265,935,948 cycles # 3.414 GHz + 2,320,403,035 instructions # 1.83 insn per cycle + 0.371132171 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 584) (512y: 62) (512z: 953) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341946 Relative difference = 2.034932117056294e-07 diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt index 6add239f16..1975a5970f 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-05-16_15:18:33 +DATE: 2024-05-16_15:26:32 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.948407e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.328423e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.761410e+08 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.519601 sec -INFO: No Floating Point Exceptions have been reported - 2,123,926,879 cycles # 2.815 GHz - 2,991,717,095 instructions # 1.41 insn per cycle - 0.811782941 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 4.232897e-01 -Avg ME (F77/GPU) = 0.42328961386341946 -Relative difference = 2.034932117056294e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.686449e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.122021e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.122021e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 1.196252 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.361427e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.560462e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.560462e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 +TOTAL : 0.901713 sec INFO: No Floating Point Exceptions have been reported - 3,435,810,217 cycles # 2.862 GHz - 8,629,255,980 instructions # 2.51 insn per cycle - 1.201785163 seconds time elapsed + 3,160,289,773 cycles # 3.495 GHz + 8,639,884,903 instructions # 2.73 insn per cycle + 0.904592180 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 403) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341951 Relative difference = 2.0349321157448718e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.590372e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.090308e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.090308e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.773787 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.368303e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.105355e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.105355e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 +TOTAL : 0.572637 sec INFO: No Floating Point Exceptions have been reported - 2,172,281,754 cycles # 2.790 GHz - 5,399,686,889 instructions # 2.49 insn per cycle - 0.779398624 seconds time elapsed + 2,010,981,764 cycles # 3.496 GHz + 5,393,612,503 instructions # 2.68 insn per cycle + 0.575509948 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1258) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341951 Relative difference = 2.0349321157448718e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.283822e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.420214e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.420214e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.573489 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.443558e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.171366e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.171366e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 +TOTAL : 0.435258 sec INFO: No Floating Point Exceptions have been reported - 1,585,769,603 cycles # 2.741 GHz - 3,149,146,191 instructions # 1.99 insn per cycle - 0.579182812 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1386) (512y: 0) (512z: 0) + 1,490,573,485 cycles # 3.404 GHz + 3,126,999,219 instructions # 2.10 insn per cycle + 0.438133228 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1384) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341946 Relative difference = 2.034932117056294e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.354137e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.604902e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.604902e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.559958 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.583125e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.473819e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.473819e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 +TOTAL : 0.418929 sec INFO: No Floating Point Exceptions have been reported - 1,547,131,577 cycles # 2.739 GHz - 3,062,437,995 instructions # 1.98 insn per cycle - 0.565482274 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1220) (512y: 95) (512z: 0) + 1,435,129,940 cycles # 3.405 GHz + 3,006,753,806 instructions # 2.10 insn per cycle + 0.421776338 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1218) (512y: 95) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341946 Relative difference = 2.034932117056294e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.108481e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.023241e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.023241e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.612188 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.334038e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.389007e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.389007e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 +TOTAL : 0.368990 sec INFO: No Floating Point Exceptions have been reported - 1,354,565,413 cycles # 2.195 GHz - 2,362,076,089 instructions # 1.74 insn per cycle - 0.617754113 seconds time elapsed + 1,268,599,838 cycles # 3.414 GHz + 2,306,107,917 instructions # 1.82 insn per cycle + 0.371913609 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 557) (512y: 62) (512z: 944) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341946 Relative difference = 2.034932117056294e-07 diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt index 35b822f8f6..a4e0799fe6 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-05-16_15:18:45 +DATE: 2024-05-16_15:26:36 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.370205e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.202282e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.219119e+09 ) sec^-1 -MeanMatrixElemValue = ( 4.240325e-01 +- 1.231174e-04 ) GeV^0 -TOTAL : 0.481970 sec -INFO: No Floating Point Exceptions have been reported - 1,992,725,828 cycles # 2.818 GHz - 2,868,294,521 instructions # 1.44 insn per cycle - 0.764321619 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 72 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 4.232893e-01 -Avg ME (F77/GPU) = 0.42328959883889183 -Relative difference = 7.059920764700599e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.685625e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.126627e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.126627e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 1.173857 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.418692e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.668051e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.668051e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241992e-01 +- 1.228718e-04 ) GeV^0 +TOTAL : 0.857057 sec INFO: No Floating Point Exceptions have been reported - 3,371,653,633 cycles # 2.862 GHz - 8,663,374,999 instructions # 2.57 insn per cycle - 1.179087797 seconds time elapsed + 3,000,819,032 cycles # 3.492 GHz + 8,687,236,928 instructions # 2.89 insn per cycle + 0.859729395 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 464) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 Avg ME (F77/C++) = 0.42328961598104797 Relative difference = 3.775440734888737e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.242831e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.476100e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.476100e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.559869 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.450523e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.473548e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.473548e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241992e-01 +- 1.228718e-04 ) GeV^0 +TOTAL : 0.420012 sec INFO: No Floating Point Exceptions have been reported - 1,544,628,517 cycles # 2.742 GHz - 3,687,558,281 instructions # 2.39 insn per cycle - 0.565253973 seconds time elapsed + 1,475,989,433 cycles # 3.494 GHz + 3,695,071,583 instructions # 2.50 insn per cycle + 0.422713076 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1472) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 Avg ME (F77/C++) = 0.42328960439772345 Relative difference = 1.0389396439618597e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.072720e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.536969e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.536969e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.431765 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.518335e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.025697e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.025697e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241992e-01 +- 1.228719e-04 ) GeV^0 +TOTAL : 0.343695 sec INFO: No Floating Point Exceptions have been reported - 1,203,780,059 cycles # 2.758 GHz - 2,425,738,448 instructions # 2.02 insn per cycle - 0.436956710 seconds time elapsed + 1,181,236,861 cycles # 3.413 GHz + 2,407,227,485 instructions # 2.04 insn per cycle + 0.346416388 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1835) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 Avg ME (F77/C++) = 0.42328956670826301 Relative difference = 7.865002347873079e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.171115e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.846212e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.846212e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.420853 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.663295e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.394993e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.394993e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241992e-01 +- 1.228719e-04 ) GeV^0 +TOTAL : 0.338394 sec INFO: No Floating Point Exceptions have been reported - 1,176,016,394 cycles # 2.764 GHz - 2,371,904,468 instructions # 2.02 insn per cycle - 0.426173333 seconds time elapsed + 1,163,803,486 cycles # 3.415 GHz + 2,320,293,736 instructions # 1.99 insn per cycle + 0.341067500 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1716) (512y: 2) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 Avg ME (F77/C++) = 0.42328956670826301 Relative difference = 7.865002347873079e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.877260e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.908000e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.908000e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.456855 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 5.367890e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.143642e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.143642e+07 ) sec^-1 +MeanMatrixElemValue = ( 4.241992e-01 +- 1.228719e-04 ) GeV^0 +TOTAL : 0.309164 sec INFO: No Floating Point Exceptions have been reported - 1,057,659,631 cycles # 2.291 GHz - 2,045,594,279 instructions # 1.93 insn per cycle - 0.462305299 seconds time elapsed + 1,065,579,084 cycles # 3.421 GHz + 1,993,744,116 instructions # 1.87 insn per cycle + 0.311782023 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1125) (512y: 5) (512z: 1216) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 Avg ME (F77/C++) = 0.42328957567224279 Relative difference = 5.7473080363015266e-08 diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt index 7aff49b16c..fa58368d8f 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-05-16_15:18:56 +DATE: 2024-05-16_15:26:40 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.371360e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.210950e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.256375e+09 ) sec^-1 -MeanMatrixElemValue = ( 4.240325e-01 +- 1.231174e-04 ) GeV^0 -TOTAL : 0.480672 sec -INFO: No Floating Point Exceptions have been reported - 1,992,055,315 cycles # 2.814 GHz - 2,833,598,547 instructions # 1.42 insn per cycle - 0.764848194 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 71 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 4.232893e-01 -Avg ME (F77/GPU) = 0.42328960436861962 -Relative difference = 7.190557844040413e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.763702e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.137508e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.137508e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 1.163446 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.460439e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.724757e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.724757e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241992e-01 +- 1.228718e-04 ) GeV^0 +TOTAL : 0.835443 sec INFO: No Floating Point Exceptions have been reported - 3,338,476,373 cycles # 2.858 GHz - 8,537,550,948 instructions # 2.56 insn per cycle - 1.168736395 seconds time elapsed + 2,930,080,668 cycles # 3.497 GHz + 8,561,435,617 instructions # 2.92 insn per cycle + 0.838100501 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 372) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 Avg ME (F77/C++) = 0.42328961598104797 Relative difference = 3.775440734888737e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.260122e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.497908e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.497908e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.555036 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.429713e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.414628e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.414628e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241992e-01 +- 1.228718e-04 ) GeV^0 +TOTAL : 0.421363 sec INFO: No Floating Point Exceptions have been reported - 1,536,047,057 cycles # 2.745 GHz - 3,655,155,421 instructions # 2.38 insn per cycle - 0.560267212 seconds time elapsed + 1,477,358,785 cycles # 3.487 GHz + 3,663,157,543 instructions # 2.48 insn per cycle + 0.423998620 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1417) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 Avg ME (F77/C++) = 0.42328960439772345 Relative difference = 1.0389396439618597e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.063874e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.501699e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.501699e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.432903 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.539131e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.024156e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.024156e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241992e-01 +- 1.228719e-04 ) GeV^0 +TOTAL : 0.342149 sec INFO: No Floating Point Exceptions have been reported - 1,210,141,290 cycles # 2.765 GHz - 2,409,755,736 instructions # 1.99 insn per cycle - 0.438252635 seconds time elapsed + 1,175,594,540 cycles # 3.412 GHz + 2,391,743,655 instructions # 2.03 insn per cycle + 0.344799784 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1739) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 Avg ME (F77/C++) = 0.42328956670826301 Relative difference = 7.865002347873079e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.166764e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.861571e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.861571e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.420903 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.668725e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.423495e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.423495e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241992e-01 +- 1.228719e-04 ) GeV^0 +TOTAL : 0.337063 sec INFO: No Floating Point Exceptions have been reported - 1,178,969,939 cycles # 2.770 GHz - 2,360,225,770 instructions # 2.00 insn per cycle - 0.426183474 seconds time elapsed + 1,159,169,528 cycles # 3.415 GHz + 2,308,303,558 instructions # 1.99 insn per cycle + 0.339703561 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1639) (512y: 2) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 Avg ME (F77/C++) = 0.42328956670826301 Relative difference = 7.865002347873079e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.911284e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.009343e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.009343e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.450059 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 5.455050e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.167889e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.167889e+07 ) sec^-1 +MeanMatrixElemValue = ( 4.241992e-01 +- 1.228719e-04 ) GeV^0 +TOTAL : 0.304678 sec INFO: No Floating Point Exceptions have been reported - 1,050,992,336 cycles # 2.312 GHz - 2,030,439,704 instructions # 1.93 insn per cycle - 0.455402836 seconds time elapsed + 1,051,727,913 cycles # 3.426 GHz + 1,978,782,960 instructions # 1.88 insn per cycle + 0.307278068 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1038) (512y: 5) (512z: 1206) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 Avg ME (F77/C++) = 0.42328957567224279 Relative difference = 5.7473080363015266e-08 diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt index abe970d6c3..d3bac7a6e7 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-05-16_15:19:08 +DATE: 2024-05-16_15:26:43 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.820532e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.774843e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.362520e+08 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.522135 sec -INFO: No Floating Point Exceptions have been reported - 2,125,526,304 cycles # 2.816 GHz - 3,031,609,259 instructions # 1.43 insn per cycle - 0.813775431 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 132 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 4.232897e-01 -Avg ME (F77/GPU) = 0.42328961420809225 -Relative difference = 2.02678940084305e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.477506e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.093135e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.093135e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 1.221347 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.327098e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.519271e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.519271e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 +TOTAL : 0.921747 sec INFO: No Floating Point Exceptions have been reported - 3,505,104,547 cycles # 2.859 GHz - 8,781,502,817 instructions # 2.51 insn per cycle - 1.226777715 seconds time elapsed + 3,232,183,518 cycles # 3.497 GHz + 8,792,573,925 instructions # 2.72 insn per cycle + 0.924658528 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 466) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962565639783 Relative difference = 1.7563291089600324e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.650256e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.201424e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.201424e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.750812 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.394321e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.150474e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.150474e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 +TOTAL : 0.569327 sec INFO: No Floating Point Exceptions have been reported - 2,158,593,065 cycles # 2.858 GHz - 5,461,970,761 instructions # 2.53 insn per cycle - 0.756427517 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1315) (avx2: 0) (512y: 0) (512z: 0) + 1,997,302,428 cycles # 3.492 GHz + 5,456,554,007 instructions # 2.73 insn per cycle + 0.572266310 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1316) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962565639783 Relative difference = 1.7563291089600324e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.173052e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.222124e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.222124e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.600946 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.559612e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.435531e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.435531e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 +TOTAL : 0.426046 sec INFO: No Floating Point Exceptions have been reported - 1,584,857,703 cycles # 2.630 GHz - 3,130,453,718 instructions # 1.98 insn per cycle - 0.606559761 seconds time elapsed + 1,459,913,674 cycles # 3.405 GHz + 3,092,427,903 instructions # 2.12 insn per cycle + 0.429012673 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1508) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962604218012 Relative difference = 1.747215201983364e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.444228e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.788523e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.788523e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.544040 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.740143e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.828447e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.828447e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 +TOTAL : 0.407253 sec INFO: No Floating Point Exceptions have been reported - 1,507,653,377 cycles # 2.746 GHz - 2,979,978,086 instructions # 1.98 insn per cycle - 0.549733637 seconds time elapsed + 1,396,236,023 cycles # 3.407 GHz + 2,910,886,914 instructions # 2.08 insn per cycle + 0.410124731 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1266) (512y: 104) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962604218012 Relative difference = 1.747215201983364e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.159766e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.131056e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.131056e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.601738 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.557464e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.979745e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.979745e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 +TOTAL : 0.356952 sec INFO: No Floating Point Exceptions have been reported - 1,324,343,740 cycles # 2.183 GHz - 2,317,585,809 instructions # 1.75 insn per cycle - 0.607328338 seconds time elapsed + 1,228,668,021 cycles # 3.417 GHz + 2,251,383,273 instructions # 1.83 insn per cycle + 0.359835554 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 708) (512y: 64) (512z: 1000) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962604218012 Relative difference = 1.747215201983364e-07 diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt index 91c7a883f0..f622a5fe7b 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-05-16_15:19:20 +DATE: 2024-05-16_15:26:47 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.922874e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.310136e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.745093e+08 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.518290 sec -INFO: No Floating Point Exceptions have been reported - 2,124,893,311 cycles # 2.820 GHz - 3,045,592,907 instructions # 1.43 insn per cycle - 0.810370808 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 4.232897e-01 -Avg ME (F77/GPU) = 0.42328961420809225 -Relative difference = 2.02678940084305e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.542081e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.100861e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.100861e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 1.212162 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.324875e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.511952e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.511952e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 +TOTAL : 0.922159 sec INFO: No Floating Point Exceptions have been reported - 3,479,876,909 cycles # 2.860 GHz - 8,693,142,752 instructions # 2.50 insn per cycle - 1.217788949 seconds time elapsed + 3,233,137,052 cycles # 3.496 GHz + 8,703,016,285 instructions # 2.69 insn per cycle + 0.925038017 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 408) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962565639783 Relative difference = 1.7563291089600324e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.583309e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.076893e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.076893e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.776846 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.384239e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.131034e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.131034e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 +TOTAL : 0.569347 sec INFO: No Floating Point Exceptions have been reported - 2,167,338,088 cycles # 2.773 GHz - 5,396,551,029 instructions # 2.49 insn per cycle - 0.782321373 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1286) (avx2: 0) (512y: 0) (512z: 0) + 1,994,158,544 cycles # 3.487 GHz + 5,378,748,355 instructions # 2.70 insn per cycle + 0.572208576 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1287) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962565639783 Relative difference = 1.7563291089600324e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.326845e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.550286e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.550286e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.565802 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.417383e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.522816e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.522816e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 +TOTAL : 0.437843 sec INFO: No Floating Point Exceptions have been reported - 1,565,712,129 cycles # 2.743 GHz - 3,096,211,416 instructions # 1.98 insn per cycle - 0.571442008 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1403) (512y: 0) (512z: 0) + 1,500,871,039 cycles # 3.408 GHz + 3,055,075,884 instructions # 2.04 insn per cycle + 0.440705424 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1408) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962604218012 Relative difference = 1.747215201983364e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.453432e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.812851e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.812851e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.541762 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.730924e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.816090e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.816090e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 +TOTAL : 0.407927 sec INFO: No Floating Point Exceptions have been reported - 1,501,240,710 cycles # 2.746 GHz - 2,962,583,104 instructions # 1.97 insn per cycle - 0.547343450 seconds time elapsed + 1,397,568,169 cycles # 3.408 GHz + 2,885,067,782 instructions # 2.06 insn per cycle + 0.410801458 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1207) (512y: 104) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962604218012 Relative difference = 1.747215201983364e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.179755e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.168512e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.168512e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.595795 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.578847e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.063083e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.063083e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 +TOTAL : 0.355641 sec INFO: No Floating Point Exceptions have been reported - 1,328,066,698 cycles # 2.210 GHz - 2,301,968,914 instructions # 1.73 insn per cycle - 0.601517736 seconds time elapsed + 1,223,923,877 cycles # 3.417 GHz + 2,232,489,048 instructions # 1.82 insn per cycle + 0.358486033 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 669) (512y: 64) (512z: 987) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962604218012 Relative difference = 1.747215201983364e-07 diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt index 685cbca5b9..6ee29efed3 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:16:05 +DATE: 2024-05-16_15:25:21 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.742150e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.168430e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.277843e+08 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 0.532609 sec -INFO: No Floating Point Exceptions have been reported - 2,187,320,510 cycles # 2.847 GHz - 3,138,661,758 instructions # 1.43 insn per cycle - 0.825533767 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 3.234080e+00 -Avg ME (F77/GPU) = 3.2340795799595186 -Relative difference = 1.2987943449389332e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.052254e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.112326e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.112326e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 5.217611 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.787390e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.861874e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.861874e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 +TOTAL : 3.890119 sec INFO: No Floating Point Exceptions have been reported - 15,171,088,318 cycles # 2.905 GHz - 38,379,828,637 instructions # 2.53 insn per cycle - 5.223033411 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 673) (avx2: 0) (512y: 0) (512z: 0) + 13,618,207,328 cycles # 3.499 GHz + 38,387,959,458 instructions # 2.82 insn per cycle + 3.893052041 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 679) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340795799593964 Relative difference = 1.2987947225564713e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.483453e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.675957e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.675957e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 3.119586 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.831062e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.077740e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.077740e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 +TOTAL : 2.297638 sec INFO: No Floating Point Exceptions have been reported - 9,050,575,942 cycles # 2.897 GHz - 24,585,418,505 instructions # 2.72 insn per cycle - 3.125051862 seconds time elapsed + 8,042,217,440 cycles # 3.496 GHz + 24,577,604,144 instructions # 3.06 insn per cycle + 2.300592580 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340795799593955 Relative difference = 1.2987947253027805e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.531605e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.007383e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.007383e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 2.004395 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 7.841839e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.487845e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.487845e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 +TOTAL : 1.464950 sec INFO: No Floating Point Exceptions have been reported - 5,470,487,475 cycles # 2.723 GHz - 11,258,117,341 instructions # 2.06 insn per cycle - 2.009874159 seconds time elapsed + 4,886,201,453 cycles # 3.329 GHz + 11,233,439,311 instructions # 2.30 insn per cycle + 1.467827376 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2379) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340795799594546 Relative difference = 1.2987945426732077e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.034312e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.611178e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.611178e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 1.846817 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 8.176872e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.875940e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.875940e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 +TOTAL : 1.406146 sec INFO: No Floating Point Exceptions have been reported - 4,937,000,755 cycles # 2.666 GHz - 10,562,656,233 instructions # 2.14 insn per cycle - 1.852346867 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) + 4,692,310,341 cycles # 3.331 GHz + 10,637,543,631 instructions # 2.27 insn per cycle + 1.409098877 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2128) (512y: 124) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340795799594546 Relative difference = 1.2987945426732077e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.686069e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.892849e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.892849e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 2.955560 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 8.318068e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.029145e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.029145e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 +TOTAL : 1.384314 sec INFO: No Floating Point Exceptions have been reported - 5,363,967,162 cycles # 1.812 GHz - 7,798,816,647 instructions # 1.45 insn per cycle - 2.961128813 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1545) + 4,618,770,611 cycles # 3.330 GHz + 7,677,034,251 instructions # 1.66 insn per cycle + 1.387244129 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1438) (512y: 100) (512z: 1543) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340795799594546 Relative difference = 1.2987945426732077e-07 diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt index e33bd01ef0..10b04f1f61 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:16:29 +DATE: 2024-05-16_15:25:32 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.734270e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.167895e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.277771e+08 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 0.531030 sec -INFO: No Floating Point Exceptions have been reported - 2,147,766,041 cycles # 2.808 GHz - 3,081,960,346 instructions # 1.43 insn per cycle - 0.823573588 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 3.234080e+00 -Avg ME (F77/GPU) = 3.2340795799595186 -Relative difference = 1.2987943449389332e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.072347e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.133952e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.133952e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 5.167480 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.740388e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.812570e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.812570e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 +TOTAL : 3.954536 sec INFO: No Floating Point Exceptions have been reported - 15,011,121,904 cycles # 2.902 GHz - 40,101,107,795 instructions # 2.67 insn per cycle - 5.172969591 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) + 13,826,590,739 cycles # 3.497 GHz + 40,193,490,698 instructions # 2.91 insn per cycle + 3.957648216 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 686) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340795799593964 Relative difference = 1.2987947225564713e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.643871e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.853935e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.853935e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 2.986462 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 5.106154e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.381557e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.381557e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 +TOTAL : 2.180640 sec INFO: No Floating Point Exceptions have been reported - 8,687,902,361 cycles # 2.905 GHz - 23,671,582,038 instructions # 2.72 insn per cycle - 2.991891761 seconds time elapsed + 7,631,771,620 cycles # 3.496 GHz + 23,662,957,585 instructions # 3.10 insn per cycle + 2.183596089 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2071) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340795799593955 Relative difference = 1.2987947253027805e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.688647e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.031946e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.031946e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 2.347118 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 6.885731e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.381574e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.381574e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 +TOTAL : 1.649994 sec INFO: No Floating Point Exceptions have been reported - 6,408,205,490 cycles # 2.726 GHz - 13,061,009,362 instructions # 2.04 insn per cycle - 2.352705794 seconds time elapsed + 5,495,835,199 cycles # 3.326 GHz + 13,036,765,053 instructions # 2.37 insn per cycle + 1.652905979 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2545) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340795799594546 Relative difference = 1.2987945426732077e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.217515e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.639971e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.639971e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 2.116902 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 7.276846e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.824926e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.824926e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 +TOTAL : 1.564074 sec INFO: No Floating Point Exceptions have been reported - 5,786,103,959 cycles # 2.728 GHz - 12,322,398,791 instructions # 2.13 insn per cycle - 2.122365893 seconds time elapsed + 5,212,061,608 cycles # 3.327 GHz + 12,262,487,055 instructions # 2.35 insn per cycle + 1.566961165 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2092) (512y: 294) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340795799594546 Relative difference = 1.2987945426732077e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.391355e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.565589e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.565589e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 3.201193 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 7.945464e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.602857e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.602857e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 +TOTAL : 1.442889 sec INFO: No Floating Point Exceptions have been reported - 5,819,258,849 cycles # 1.816 GHz - 9,603,315,511 instructions # 1.65 insn per cycle - 3.206783116 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1509) (512y: 209) (512z: 1970) + 4,806,634,394 cycles # 3.325 GHz + 9,536,077,650 instructions # 1.98 insn per cycle + 1.445783339 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1520) (512y: 199) (512z: 1969) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340795799594546 Relative difference = 1.2987945426732077e-07 diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt index fa2404eda0..afa4d341c3 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:16:53 +DATE: 2024-05-16_15:25:45 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.806467e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.679043e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.988694e+08 ) sec^-1 -MeanMatrixElemValue = ( 3.294909e+00 +- 3.228140e-03 ) GeV^0 -TOTAL : 0.484472 sec -INFO: No Floating Point Exceptions have been reported - 2,024,107,607 cycles # 2.847 GHz - 2,925,717,340 instructions # 1.45 insn per cycle - 0.767822860 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 3.234085e+00 -Avg ME (F77/GPU) = 3.2341253389604390 -Relative difference = 1.2473067479392238e-05 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.190102e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.263149e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.263149e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294973e+00 +- 3.228584e-03 ) GeV^0 -TOTAL : 4.875075 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.067256e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.169032e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.169032e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287818e+00 +- 3.220110e-03 ) GeV^0 +TOTAL : 3.535020 sec INFO: No Floating Point Exceptions have been reported - 14,157,231,167 cycles # 2.902 GHz - 38,349,372,496 instructions # 2.71 insn per cycle - 4.880360280 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 596) (avx2: 0) (512y: 0) (512z: 0) + 12,369,254,741 cycles # 3.496 GHz + 38,267,142,446 instructions # 3.09 insn per cycle + 3.537767898 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 598) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234094e+00 Avg ME (F77/C++) = 3.2340941932052374 Relative difference = 5.974014286114415e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.893708e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.295163e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.295163e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294972e+00 +- 3.228583e-03 ) GeV^0 -TOTAL : 2.231375 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 6.570040e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.068066e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.068066e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287818e+00 +- 3.220110e-03 ) GeV^0 +TOTAL : 1.711228 sec INFO: No Floating Point Exceptions have been reported - 6,474,839,888 cycles # 2.896 GHz - 15,821,273,128 instructions # 2.44 insn per cycle - 2.236825857 seconds time elapsed + 5,990,330,565 cycles # 3.497 GHz + 15,827,583,020 instructions # 2.64 insn per cycle + 1.713959969 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2693) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234093e+00 Avg ME (F77/C++) = 3.2340934062376618 Relative difference = 1.2561100182708985e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.952001e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.027533e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.027533e+06 ) sec^-1 -MeanMatrixElemValue = ( 3.295004e+00 +- 3.229072e-03 ) GeV^0 -TOTAL : 1.258720 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.250375e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.419899e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.419899e+06 ) sec^-1 +MeanMatrixElemValue = ( 3.287795e+00 +- 3.219542e-03 ) GeV^0 +TOTAL : 0.948179 sec INFO: No Floating Point Exceptions have been reported - 3,454,982,692 cycles # 2.735 GHz - 7,599,041,128 instructions # 2.20 insn per cycle - 1.263980564 seconds time elapsed + 3,175,156,549 cycles # 3.340 GHz + 7,579,990,052 instructions # 2.39 insn per cycle + 0.950902565 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3054) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234092e+00 Avg ME (F77/C++) = 3.2340919882990420 Relative difference = 3.6180040581126224e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.592851e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.112843e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.112843e+06 ) sec^-1 -MeanMatrixElemValue = ( 3.295004e+00 +- 3.229072e-03 ) GeV^0 -TOTAL : 1.180051 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.284548e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.465053e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.465053e+06 ) sec^-1 +MeanMatrixElemValue = ( 3.287795e+00 +- 3.219542e-03 ) GeV^0 +TOTAL : 0.927820 sec INFO: No Floating Point Exceptions have been reported - 3,244,154,820 cycles # 2.739 GHz - 7,208,080,032 instructions # 2.22 insn per cycle - 1.185371954 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2854) (512y: 23) (512z: 0) + 3,106,651,076 cycles # 3.339 GHz + 7,229,113,930 instructions # 2.33 insn per cycle + 0.930548590 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2909) (512y: 3) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234092e+00 Avg ME (F77/C++) = 3.2340919882990420 Relative difference = 3.6180040581126224e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.861599e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.601056e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.601056e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.295004e+00 +- 3.229072e-03 ) GeV^0 -TOTAL : 1.616099 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.443944e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.682164e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.682164e+06 ) sec^-1 +MeanMatrixElemValue = ( 3.287795e+00 +- 3.219542e-03 ) GeV^0 +TOTAL : 0.837741 sec INFO: No Floating Point Exceptions have been reported - 3,061,871,050 cycles # 1.890 GHz - 5,840,738,200 instructions # 1.91 insn per cycle - 1.621459577 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2375) (512y: 24) (512z: 1889) + 2,808,864,011 cycles # 3.343 GHz + 5,765,952,499 instructions # 2.05 insn per cycle + 0.840456519 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2378) (512y: 0) (512z: 1889) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234092e+00 Avg ME (F77/C++) = 3.2340921289287508 Relative difference = 3.986551736519174e-08 diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt index 17580b0829..04e4af6a7b 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:17:12 +DATE: 2024-05-16_15:25:54 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.907160e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.728602e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.048441e+08 ) sec^-1 -MeanMatrixElemValue = ( 3.294909e+00 +- 3.228140e-03 ) GeV^0 -TOTAL : 0.485743 sec -INFO: No Floating Point Exceptions have been reported - 2,023,423,533 cycles # 2.849 GHz - 2,905,255,031 instructions # 1.44 insn per cycle - 0.768600730 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 3.234085e+00 -Avg ME (F77/GPU) = 3.2341253389604390 -Relative difference = 1.2473067479392238e-05 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.168782e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.238544e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.238544e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294973e+00 +- 3.228584e-03 ) GeV^0 -TOTAL : 4.921731 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 3.115923e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.224041e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.224041e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287818e+00 +- 3.220110e-03 ) GeV^0 +TOTAL : 3.480773 sec INFO: No Floating Point Exceptions have been reported - 14,314,886,956 cycles # 2.906 GHz - 39,834,092,366 instructions # 2.78 insn per cycle - 4.927032591 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 567) (avx2: 0) (512y: 0) (512z: 0) + 12,185,361,621 cycles # 3.498 GHz + 39,804,148,929 instructions # 3.27 insn per cycle + 3.483488402 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 577) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234094e+00 Avg ME (F77/C++) = 3.2340941675938666 Relative difference = 5.182096339328524e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.713515e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.269520e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.269520e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294972e+00 +- 3.228583e-03 ) GeV^0 -TOTAL : 1.922771 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 8.103568e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.876705e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.876705e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287818e+00 +- 3.220110e-03 ) GeV^0 +TOTAL : 1.407812 sec INFO: No Floating Point Exceptions have been reported - 5,581,497,918 cycles # 2.896 GHz - 15,286,085,618 instructions # 2.74 insn per cycle - 1.928038449 seconds time elapsed + 4,928,928,439 cycles # 3.495 GHz + 15,291,551,221 instructions # 3.10 insn per cycle + 1.410572590 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2473) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234093e+00 Avg ME (F77/C++) = 3.2340934062376618 Relative difference = 1.2561100182708985e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.348339e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.987488e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.987488e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.295004e+00 +- 3.229072e-03 ) GeV^0 -TOTAL : 1.738529 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 9.083110e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.950197e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.950197e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287795e+00 +- 3.219542e-03 ) GeV^0 +TOTAL : 1.264015 sec INFO: No Floating Point Exceptions have been reported - 4,748,584,350 cycles # 2.724 GHz - 9,734,762,909 instructions # 2.05 insn per cycle - 1.743720825 seconds time elapsed + 4,217,167,690 cycles # 3.330 GHz + 9,714,976,139 instructions # 2.30 insn per cycle + 1.266795977 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3707) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234092e+00 Avg ME (F77/C++) = 3.2340919817797840 Relative difference = 5.633796441974414e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.524514e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.201131e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.201131e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.295004e+00 +- 3.229072e-03 ) GeV^0 -TOTAL : 1.693263 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 9.192270e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.008345e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.008345e+06 ) sec^-1 +MeanMatrixElemValue = ( 3.287795e+00 +- 3.219542e-03 ) GeV^0 +TOTAL : 1.252318 sec INFO: No Floating Point Exceptions have been reported - 4,630,030,488 cycles # 2.727 GHz - 9,326,323,775 instructions # 2.01 insn per cycle - 1.698452247 seconds time elapsed + 4,177,402,910 cycles # 3.329 GHz + 9,272,184,711 instructions # 2.22 insn per cycle + 1.255030337 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3495) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234092e+00 Avg ME (F77/C++) = 3.2340919817797840 Relative difference = 5.633796441974414e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.566237e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.043529e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.043529e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.295004e+00 +- 3.229072e-03 ) GeV^0 -TOTAL : 1.970968 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 1.176003e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.330692e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.330692e+06 ) sec^-1 +MeanMatrixElemValue = ( 3.287795e+00 +- 3.219542e-03 ) GeV^0 +TOTAL : 1.003208 sec INFO: No Floating Point Exceptions have been reported - 3,659,262,236 cycles # 1.853 GHz - 7,035,706,161 instructions # 1.92 insn per cycle - 1.976219857 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2608) (512y: 12) (512z: 2220) + 3,352,170,777 cycles # 3.333 GHz + 6,969,311,498 instructions # 2.08 insn per cycle + 1.005942374 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2609) (512y: 0) (512z: 2220) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234092e+00 Avg ME (F77/C++) = 3.2340921270661056 Relative difference = 3.928957668408837e-08 diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt index b504154b8b..690851a1c4 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:17:33 +DATE: 2024-05-16_15:26:04 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.734753e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.166290e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.275672e+08 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 0.527580 sec -INFO: No Floating Point Exceptions have been reported - 2,184,025,819 cycles # 2.852 GHz - 3,120,664,968 instructions # 1.43 insn per cycle - 0.822365132 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 3.234080e+00 -Avg ME (F77/GPU) = 3.2340795839181666 -Relative difference = 1.2865539301192385e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.032702e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.091464e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.091464e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 5.267767 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.763034e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.836655e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.836655e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 +TOTAL : 3.924694 sec INFO: No Floating Point Exceptions have been reported - 15,275,610,730 cycles # 2.898 GHz - 38,585,204,587 instructions # 2.53 insn per cycle - 5.273127531 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 677) (avx2: 0) (512y: 0) (512z: 0) + 13,738,931,281 cycles # 3.498 GHz + 38,522,259,648 instructions # 2.80 insn per cycle + 3.927637983 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 683) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796721168488 Relative difference = 1.0138374786539113e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.478780e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.672331e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.672331e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 3.124457 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 4.902677e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.156176e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.156176e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 +TOTAL : 2.266078 sec INFO: No Floating Point Exceptions have been reported - 8,951,368,692 cycles # 2.862 GHz - 24,230,346,765 instructions # 2.71 insn per cycle - 3.129932357 seconds time elapsed + 7,934,170,917 cycles # 3.497 GHz + 24,223,400,789 instructions # 3.05 insn per cycle + 2.268949403 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2188) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796721168488 Relative difference = 1.0138374786539113e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.646169e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.144963e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.144963e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 1.966588 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 8.000583e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.673091e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.673091e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 +TOTAL : 1.438286 sec INFO: No Floating Point Exceptions have been reported - 5,394,193,630 cycles # 2.737 GHz - 11,282,079,100 instructions # 2.09 insn per cycle - 1.972075346 seconds time elapsed + 4,798,481,088 cycles # 3.330 GHz + 11,208,114,331 instructions # 2.34 insn per cycle + 1.441176727 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2483) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796772295590 Relative difference = 9.980286234148268e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.312770e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.933844e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.933844e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 1.769300 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 8.267759e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.985920e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.985920e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 +TOTAL : 1.392423 sec INFO: No Floating Point Exceptions have been reported - 4,855,634,573 cycles # 2.737 GHz - 10,529,908,188 instructions # 2.17 insn per cycle - 1.774939787 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2170) (512y: 148) (512z: 0) + 4,644,990,529 cycles # 3.330 GHz + 10,555,084,125 instructions # 2.27 insn per cycle + 1.395314648 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2221) (512y: 128) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796772295590 Relative difference = 9.980286234148268e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.779051e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.993953e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.993953e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 2.883837 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 8.584630e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.339766e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.339766e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 +TOTAL : 1.345070 sec INFO: No Floating Point Exceptions have been reported - 5,232,692,174 cycles # 1.812 GHz - 7,609,089,901 instructions # 1.45 insn per cycle - 2.889504238 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1633) (512y: 126) (512z: 1611) + 4,488,723,511 cycles # 3.331 GHz + 7,446,352,831 instructions # 1.66 insn per cycle + 1.347938337 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1626) (512y: 104) (512z: 1609) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796772295590 Relative difference = 9.980286234148268e-08 diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt index 62b069d661..5bafeaaf14 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt @@ -1,214 +1,183 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' - make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:17:56 +DATE: 2024-05-16_15:26:16 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.743856e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.168884e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.279553e+08 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 0.531580 sec -INFO: No Floating Point Exceptions have been reported - 2,155,818,187 cycles # 2.818 GHz - 3,085,690,683 instructions # 1.43 insn per cycle - 0.823819066 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 3.234080e+00 -Avg ME (F77/GPU) = 3.2340795839181666 -Relative difference = 1.2865539301192385e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.002464e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.060011e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.060011e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 5.344548 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 2.685093e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.754855e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.754855e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 +TOTAL : 4.033579 sec INFO: No Floating Point Exceptions have been reported - 15,331,700,326 cycles # 2.866 GHz - 40,369,778,421 instructions # 2.63 insn per cycle - 5.350011304 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) + 14,113,224,310 cycles # 3.497 GHz + 40,344,618,972 instructions # 2.86 insn per cycle + 4.036535060 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 682) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796721168488 Relative difference = 1.0138374786539113e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.555017e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.755921e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.755921e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 3.059082 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 5.092059e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.369527e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.369527e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 +TOTAL : 2.185707 sec INFO: No Floating Point Exceptions have been reported - 8,522,277,742 cycles # 2.782 GHz - 23,253,428,254 instructions # 2.73 insn per cycle - 3.064709896 seconds time elapsed + 7,645,095,536 cycles # 3.493 GHz + 23,246,336,631 instructions # 3.04 insn per cycle + 2.189160403 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2090) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796721168488 Relative difference = 1.0138374786539113e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.699594e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.044812e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.044812e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 2.340476 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 6.835334e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.320136e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.320136e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 +TOTAL : 1.661105 sec INFO: No Floating Point Exceptions have been reported - 6,239,696,903 cycles # 2.661 GHz - 12,963,096,678 instructions # 2.08 insn per cycle - 2.346005075 seconds time elapsed + 5,533,662,877 cycles # 3.326 GHz + 12,888,058,955 instructions # 2.33 insn per cycle + 1.664031290 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2668) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796772295590 Relative difference = 9.980286234148268e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.032659e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.430530e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.430530e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 2.191599 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 7.211823e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.749624e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.749624e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 +TOTAL : 1.577159 sec INFO: No Floating Point Exceptions have been reported - 5,901,015,524 cycles # 2.687 GHz - 12,238,387,260 instructions # 2.07 insn per cycle - 2.197121947 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2208) (512y: 296) (512z: 0) + 5,254,421,927 cycles # 3.327 GHz + 12,086,386,777 instructions # 2.30 insn per cycle + 1.580021491 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2209) (512y: 296) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796772295590 Relative difference = 9.980286234148268e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.554826e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.745267e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.745267e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 3.058656 sec +OMP threads / `nproc --all` = 1 / 32 +EvtsPerSec[Rmb+ME] (23) = ( 8.190386e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.891944e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.891944e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 +TOTAL : 1.403785 sec INFO: No Floating Point Exceptions have been reported - 5,596,491,041 cycles # 1.827 GHz - 8,743,545,379 instructions # 1.56 insn per cycle - 3.064278596 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1490) (512y: 183) (512z: 1908) + 4,676,949,985 cycles # 3.325 GHz + 8,673,917,546 instructions # 1.85 insn per cycle + 1.406719317 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1501) (512y: 173) (512z: 1907) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796772295590 Relative difference = 9.980286234148268e-08 From 7bfe75c9a2a4147ee50c2f3fe8f2072236c2fdef Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 16 May 2024 19:28:08 +0200 Subject: [PATCH 41/48] [jtmk2] rerun 30 tmad tests on itgold91, all as expected STARTED AT Thu May 16 03:28:16 PM CEST 2024 (SM tests) ENDED(1) AT Thu May 16 06:54:44 PM CEST 2024 [Status=0] (BSM tests) ENDED(1) AT Thu May 16 06:59:48 PM CEST 2024 [Status=0] 20 /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt 20 /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt 20 /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt 20 /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt 20 /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt 20 /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt 20 /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt 20 /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt 20 /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt 20 /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt 20 /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt 20 /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt 20 /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt 20 /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt 20 /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt 20 /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt 20 /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt 20 /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt 20 /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt 1 /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt 20 /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt 20 /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt 20 /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt 20 /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt 0 /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt 0 /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt 0 /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt 0 /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt 0 /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt 0 /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt --- .../log_eemumu_mad_d_inl0_hrd0.txt | 309 ++++++----------- .../log_eemumu_mad_f_inl0_hrd0.txt | 313 ++++++----------- .../log_eemumu_mad_m_inl0_hrd0.txt | 309 ++++++----------- .../log_ggtt_mad_d_inl0_hrd0.txt | 311 ++++++----------- .../log_ggtt_mad_f_inl0_hrd0.txt | 309 ++++++----------- .../log_ggtt_mad_m_inl0_hrd0.txt | 309 ++++++----------- .../log_ggttg_mad_d_inl0_hrd0.txt | 313 ++++++----------- .../log_ggttg_mad_f_inl0_hrd0.txt | 309 ++++++----------- .../log_ggttg_mad_m_inl0_hrd0.txt | 311 ++++++----------- .../log_ggttgg_mad_d_inl0_hrd0.txt | 309 ++++++----------- .../log_ggttgg_mad_f_inl0_hrd0.txt | 307 ++++++----------- .../log_ggttgg_mad_m_inl0_hrd0.txt | 309 ++++++----------- .../log_ggttggg_mad_d_inl0_hrd0.txt | 309 ++++++----------- .../log_ggttggg_mad_f_inl0_hrd0.txt | 317 ++++++------------ .../log_ggttggg_mad_m_inl0_hrd0.txt | 309 ++++++----------- .../log_gqttq_mad_d_inl0_hrd0.txt | 307 ++++++----------- .../log_gqttq_mad_f_inl0_hrd0.txt | 313 ++++++----------- .../log_gqttq_mad_m_inl0_hrd0.txt | 311 ++++++----------- .../log_heftggbb_mad_d_inl0_hrd0.txt | 309 ++++++----------- .../log_heftggbb_mad_f_inl0_hrd0.txt | 71 ++-- .../log_heftggbb_mad_m_inl0_hrd0.txt | 309 ++++++----------- .../log_smeftggtttt_mad_d_inl0_hrd0.txt | 311 ++++++----------- .../log_smeftggtttt_mad_f_inl0_hrd0.txt | 309 ++++++----------- .../log_smeftggtttt_mad_m_inl0_hrd0.txt | 309 ++++++----------- .../log_susyggt1t1_mad_d_inl0_hrd0.txt | 63 ++-- .../log_susyggt1t1_mad_f_inl0_hrd0.txt | 65 ++-- .../log_susyggt1t1_mad_m_inl0_hrd0.txt | 63 ++-- .../log_susyggtt_mad_d_inl0_hrd0.txt | 69 ++-- .../log_susyggtt_mad_f_inl0_hrd0.txt | 69 ++-- .../log_susyggtt_mad_m_inl0_hrd0.txt | 69 ++-- 30 files changed, 2529 insertions(+), 5071 deletions(-) diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index 41d66d8253..8cf68c4c9b 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,41 +1,36 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum - -make USEBUILDDIR=1 BACKEND=cuda +Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' - make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2024-05-16_01:25:56 +DATE: 2024-05-16_15:28:51 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -50,7 +45,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +53,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3798 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.7231s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7147s - [COUNTERS] Fortran MEs ( 1 ) : 0.0084s for 8192 events => throughput is 9.76E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4710s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4643s + [COUNTERS] Fortran MEs ( 1 ) : 0.0067s for 8192 events => throughput is 1.23E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -75,7 +70,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +78,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1857s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1771s - [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.1222s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1157s + [COUNTERS] Fortran MEs ( 1 ) : 0.0065s for 8192 events => throughput is 1.27E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -100,7 +95,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/output_eemumu_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +103,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3949s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3024s - [COUNTERS] Fortran MEs ( 1 ) : 0.0925s for 90112 events => throughput is 9.74E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2636s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1958s + [COUNTERS] Fortran MEs ( 1 ) : 0.0678s for 90112 events => throughput is 1.33E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,7 +120,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -133,9 +128,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661545E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1913s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1844s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0069s for 8192 events => throughput is 1.18E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1255s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1201s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.52E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -158,7 +153,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -166,9 +161,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3835s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3081s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0754s for 90112 events => throughput is 1.20E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2595s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2015s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0579s for 90112 events => throughput is 1.56E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -179,14 +174,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000753E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.192081e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.541864e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.204613e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.584257e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +196,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,9 +204,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1864s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1822s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0042s for 8192 events => throughput is 1.95E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1233s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1200s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.51E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -234,7 +229,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -242,9 +237,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3602s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3125s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0476s for 90112 events => throughput is 1.89E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2334s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1993s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0341s for 90112 events => throughput is 2.64E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -255,14 +250,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000753E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.953763e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.692869e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.009226e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.814428e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -277,7 +272,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -285,9 +280,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1833s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1802s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.59E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1211s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1187s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0024s for 8192 events => throughput is 3.39E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -310,7 +305,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -318,9 +313,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3431s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3065s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0366s for 90112 events => throughput is 2.46E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2261s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1996s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0265s for 90112 events => throughput is 3.40E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -331,14 +326,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000739E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.541984e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.533549e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.616899e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.712671e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -353,7 +348,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -361,9 +356,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1841s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1810s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.69E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1214s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1191s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0023s for 8192 events => throughput is 3.53E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -386,7 +381,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -394,9 +389,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3388s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3042s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0346s for 90112 events => throughput is 2.61E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2242s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1988s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0254s for 90112 events => throughput is 3.55E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -407,14 +402,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000739E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.662866e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.627236e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.883371e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.839363e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -429,7 +424,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -437,9 +432,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1859s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1818s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0041s for 8192 events => throughput is 1.99E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1199s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1181s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0019s for 8192 events => throughput is 4.43E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -462,7 +457,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -470,9 +465,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3474s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3044s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0430s for 90112 events => throughput is 2.10E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2212s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2016s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0196s for 90112 events => throughput is 4.60E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -483,121 +478,17 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000739E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.029340e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.651847e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.231218e+06 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.6140s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6135s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.60E+07 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.3382715404661532E-002) and cuda (9.3382715404661532E-002) differ by less than 3E-14 (0.0) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.7363s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7314s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0049s for 90112 events => throughput is 1.86E+07 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.1515602020000766E-002) and cuda (9.1515602020000753E-002) differ by less than 3E-14 (1.1102230246251565e-16) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.277665e+07 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.916168e+08 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.959957e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.493136e+08 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.970202e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.040191e+08 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.002261e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.051717e+06 ) sec^-1 -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.140061e+08 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index c4c8099bbf..6cae3f4f6d 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,41 +1,36 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum - -make USEBUILDDIR=1 BACKEND=cuda - - +Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + +make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2024-05-16_01:26:13 +DATE: 2024-05-16_15:28:55 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -50,7 +45,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +53,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3798 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.7287s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7200s - [COUNTERS] Fortran MEs ( 1 ) : 0.0087s for 8192 events => throughput is 9.42E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4723s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4656s + [COUNTERS] Fortran MEs ( 1 ) : 0.0067s for 8192 events => throughput is 1.23E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -75,7 +70,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +78,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1869s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1783s - [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.57E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.1236s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1169s + [COUNTERS] Fortran MEs ( 1 ) : 0.0067s for 8192 events => throughput is 1.23E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -100,7 +95,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/output_eemumu_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +103,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3902s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2987s - [COUNTERS] Fortran MEs ( 1 ) : 0.0916s for 90112 events => throughput is 9.84E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2691s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2009s + [COUNTERS] Fortran MEs ( 1 ) : 0.0683s for 90112 events => throughput is 1.32E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,7 +120,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -133,9 +128,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382703205998396E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1903s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1836s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0067s for 8192 events => throughput is 1.22E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1233s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1185s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0047s for 8192 events => throughput is 1.73E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -158,7 +153,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -166,9 +161,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515590123565249E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3825s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3093s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0732s for 90112 events => throughput is 1.23E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2500s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1993s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0507s for 90112 events => throughput is 1.78E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -179,14 +174,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515590123565249E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.260929e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.788544e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.250210e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.800072e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +196,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,9 +204,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382700723828302E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1813s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1787s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.15E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1189s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1169s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0020s for 8192 events => throughput is 4.17E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -234,7 +229,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -242,9 +237,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515587612890761E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3288s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2997s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0290s for 90112 events => throughput is 3.10E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2157s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1949s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0207s for 90112 events => throughput is 4.34E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -255,14 +250,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515587612890761E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.206836e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.345215e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.334282e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.518939e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -277,7 +272,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -285,9 +280,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382700679354239E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1835s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1811s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0024s for 8192 events => throughput is 3.38E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1228s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1210s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.52E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -310,7 +305,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -318,9 +313,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515587619408464E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3304s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3034s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0270s for 90112 events => throughput is 3.34E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2201s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2005s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0196s for 90112 events => throughput is 4.59E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -331,14 +326,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515587619408464E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.522447e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.843944e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.386931e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.040424e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -353,7 +348,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -361,9 +356,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382700679354239E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1829s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1805s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0024s for 8192 events => throughput is 3.41E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1222s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1204s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.57E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -386,7 +381,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -394,9 +389,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515587619408464E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3301s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3033s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0268s for 90112 events => throughput is 3.37E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2202s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2006s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0196s for 90112 events => throughput is 4.60E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -407,14 +402,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515587619408464E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.528072e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.884882e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.720927e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.111021e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -429,7 +424,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -437,9 +432,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382704335459282E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1844s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1819s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.21E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1204s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1192s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0012s for 8192 events => throughput is 6.61E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -462,7 +457,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -470,9 +465,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515591296252558E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3371s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3080s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0291s for 90112 events => throughput is 3.10E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2134s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2000s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0134s for 90112 events => throughput is 6.70E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -483,121 +478,17 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515591296252558E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.341186e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.259477e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.598530e+06 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382706077425631E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.6090s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6085s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.68E+07 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.3382715404661532E-002) and cuda (9.3382706077425631E-002) differ by less than 4E-4 (9.988182347875352e-08) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515592892887687E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.7344s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7297s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0046s for 90112 events => throughput is 1.95E+07 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.1515602020000766E-002) and cuda (9.1515592892887687E-002) differ by less than 4E-4 (9.973286385633884e-08) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.546893e+07 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.804903e+08 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.477327e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.060127e+09 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.389797e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.251129e+09 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.752691e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.844948e+06 ) sec^-1 -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.481445e+08 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index fc86f120db..fa324ac52a 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -1,41 +1,36 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum - +Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' - make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2024-05-16_01:26:29 +DATE: 2024-05-16_15:28:59 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -50,7 +45,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +53,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3798 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.7237s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7153s - [COUNTERS] Fortran MEs ( 1 ) : 0.0084s for 8192 events => throughput is 9.74E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4705s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4639s + [COUNTERS] Fortran MEs ( 1 ) : 0.0067s for 8192 events => throughput is 1.23E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -75,7 +70,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +78,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1861s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1773s - [COUNTERS] Fortran MEs ( 1 ) : 0.0087s for 8192 events => throughput is 9.40E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.1234s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1169s + [COUNTERS] Fortran MEs ( 1 ) : 0.0065s for 8192 events => throughput is 1.27E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -100,7 +95,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/output_eemumu_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +103,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.4045s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3124s - [COUNTERS] Fortran MEs ( 1 ) : 0.0920s for 90112 events => throughput is 9.79E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2647s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1962s + [COUNTERS] Fortran MEs ( 1 ) : 0.0685s for 90112 events => throughput is 1.32E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,7 +120,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -133,9 +128,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715420701395E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1967s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1895s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0072s for 8192 events => throughput is 1.14E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1276s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1218s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0057s for 8192 events => throughput is 1.43E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -158,7 +153,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -166,9 +161,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602033080859E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3901s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3110s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0791s for 90112 events => throughput is 1.14E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2614s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2022s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0592s for 90112 events => throughput is 1.52E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -179,14 +174,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602033080859E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.191141e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.494771e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.200935e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.550621e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +196,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,9 +204,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715420701354E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1857s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1815s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0041s for 8192 events => throughput is 1.98E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1229s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1197s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.59E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -234,7 +229,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -242,9 +237,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602033080859E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3486s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3030s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0456s for 90112 events => throughput is 1.98E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2320s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1991s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0329s for 90112 events => throughput is 2.74E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -255,14 +250,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602033080859E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.010123e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.753216e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.071657e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.821401e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -277,7 +272,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -285,9 +280,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1848s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1815s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.49E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1198s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1174s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0024s for 8192 events => throughput is 3.41E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -310,7 +305,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -318,9 +313,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3402s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3039s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0363s for 90112 events => throughput is 2.49E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2265s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2002s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0263s for 90112 events => throughput is 3.42E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -331,14 +326,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602022697845E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.462700e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.554099e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.639506e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.727720e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -353,7 +348,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -361,9 +356,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1850s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1819s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.61E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1205s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1182s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0023s for 8192 events => throughput is 3.56E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -386,7 +381,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -394,9 +389,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3431s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3079s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0351s for 90112 events => throughput is 2.56E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2246s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1995s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0250s for 90112 events => throughput is 3.60E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -407,14 +402,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602022697845E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.637002e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.764121e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.800572e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.944177e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -429,7 +424,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -437,9 +432,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1865s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1828s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0037s for 8192 events => throughput is 2.21E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1198s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1181s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0017s for 8192 events => throughput is 4.69E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -462,7 +457,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -470,9 +465,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3487s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3069s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0418s for 90112 events => throughput is 2.16E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2164s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1979s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0185s for 90112 events => throughput is 4.86E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -483,121 +478,17 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602022697845E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.197743e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.022445e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.278261e+06 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715392009194E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.6095s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6090s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.64E+07 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.3382715404661532E-002) and cuda (9.3382715392009194E-002) differ by less than 2E-4 (1.3548906441229747e-10) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515602021089631E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.7369s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7319s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0050s for 90112 events => throughput is 1.81E+07 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.1515602020000766E-002) and cuda (9.1515602021089631E-002) differ by less than 2E-4 (1.1898038110302878e-11) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.356139e+07 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.953546e+08 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.960740e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.522141e+08 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.009432e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.090602e+08 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.972046e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.354948e+06 ) sec^-1 -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.157381e+08 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index e1be7813b6..92de97ac10 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -1,41 +1,36 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx + +make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cpp512y -make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 - -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' - make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-05-16_01:26:45 +DATE: 2024-05-16_15:29:02 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -50,7 +45,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +53,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 2601 events (found 5405 events) - [COUNTERS] PROGRAM TOTAL : 0.8221s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7787s - [COUNTERS] Fortran MEs ( 1 ) : 0.0433s for 8192 events => throughput is 1.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5258s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4937s + [COUNTERS] Fortran MEs ( 1 ) : 0.0321s for 8192 events => throughput is 2.55E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -75,7 +70,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +78,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4146s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3704s - [COUNTERS] Fortran MEs ( 1 ) : 0.0442s for 8192 events => throughput is 1.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2706s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2386s + [COUNTERS] Fortran MEs ( 1 ) : 0.0320s for 8192 events => throughput is 2.56E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -100,7 +95,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/output_ggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +103,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7473s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2714s - [COUNTERS] Fortran MEs ( 1 ) : 0.4758s for 90112 events => throughput is 1.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.2060s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8546s + [COUNTERS] Fortran MEs ( 1 ) : 0.3515s for 90112 events => throughput is 2.56E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,7 +120,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -133,9 +128,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4516s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4120s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0396s for 8192 events => throughput is 2.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2934s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2640s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0294s for 8192 events => throughput is 2.79E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -158,7 +153,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -166,9 +161,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.8025s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3670s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4355s for 90112 events => throughput is 2.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.2126s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8889s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3237s for 90112 events => throughput is 2.78E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -179,14 +174,14 @@ OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989121) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.132783e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.781252e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.139840e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.853290e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +196,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,9 +204,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4276s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4032s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0244s for 8192 events => throughput is 3.36E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2705s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2539s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0166s for 8192 events => throughput is 4.93E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -234,7 +229,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -242,9 +237,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989106] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.6069s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3541s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2528s for 90112 events => throughput is 3.56E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.0529s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8705s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1823s for 90112 events => throughput is 4.94E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -255,14 +250,14 @@ OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989106) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.613591e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.903053e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.632391e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.023847e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -277,7 +272,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -285,9 +280,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3994s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3855s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0139s for 8192 events => throughput is 5.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2575s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2474s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0101s for 8192 events => throughput is 8.13E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -310,7 +305,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -318,9 +313,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.5056s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3497s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1559s for 90112 events => throughput is 5.78E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9777s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8672s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1104s for 90112 events => throughput is 8.16E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -331,14 +326,14 @@ OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989121) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.889737e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.417521e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.921310e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.515777e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -353,7 +348,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -361,9 +356,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3995s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3870s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0125s for 8192 events => throughput is 6.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2622s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2525s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0097s for 8192 events => throughput is 8.42E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -386,7 +381,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -394,9 +389,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4813s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3413s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1399s for 90112 events => throughput is 6.44E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9756s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8691s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1065s for 90112 events => throughput is 8.46E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -407,14 +402,14 @@ OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989121) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.578026e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.686858e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.673606e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.827717e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -429,7 +424,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -437,9 +432,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4212s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3991s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0221s for 8192 events => throughput is 3.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2551s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2457s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0094s for 8192 events => throughput is 8.72E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -462,7 +457,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -470,9 +465,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.5933s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3549s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2384s for 90112 events => throughput is 3.78E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9657s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8630s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1027s for 90112 events => throughput is 8.78E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -483,121 +478,17 @@ OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989121) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.816986e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.784138e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.814285e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.8124s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8118s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.42E+07 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.094184803756640) and cuda (47.094184803756640) differ by less than 3E-14 (0.0) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 - [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7642s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7574s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0067s for 90112 events => throughput is 1.34E+07 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.105695279989114) and cuda (47.105695279989121) differ by less than 3E-14 (2.220446049250313e-16) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.120396e+07 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.622859e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.177398e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.080565e+08 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.172657e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.155839e+08 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.173872e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.794801e+05 ) sec^-1 -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.068966e+07 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 0b367d2d96..79930797e3 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -1,41 +1,36 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx - -make USEBUILDDIR=1 BACKEND=cuda +Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppnone + make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' - make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-05-16_01:27:12 +DATE: 2024-05-16_15:29:12 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -50,7 +45,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +53,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 2601 events (found 5405 events) - [COUNTERS] PROGRAM TOTAL : 0.8191s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7751s - [COUNTERS] Fortran MEs ( 1 ) : 0.0439s for 8192 events => throughput is 1.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5279s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4958s + [COUNTERS] Fortran MEs ( 1 ) : 0.0321s for 8192 events => throughput is 2.55E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -75,7 +70,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +78,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4113s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3679s - [COUNTERS] Fortran MEs ( 1 ) : 0.0434s for 8192 events => throughput is 1.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2696s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2376s + [COUNTERS] Fortran MEs ( 1 ) : 0.0321s for 8192 events => throughput is 2.56E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -100,7 +95,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/output_ggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +103,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7478s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2691s - [COUNTERS] Fortran MEs ( 1 ) : 0.4787s for 90112 events => throughput is 1.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.2045s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8527s + [COUNTERS] Fortran MEs ( 1 ) : 0.3517s for 90112 events => throughput is 2.56E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,7 +120,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -133,9 +128,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094179780921394] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4476s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4108s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0368s for 8192 events => throughput is 2.23E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2917s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2649s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0268s for 8192 events => throughput is 3.06E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -158,7 +153,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -166,9 +161,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105688579298537] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7717s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3659s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4058s for 90112 events => throughput is 2.22E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.1759s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8812s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2948s for 90112 events => throughput is 3.06E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -179,14 +174,14 @@ OK! xsec from fortran (47.105695279989114) and cpp (47.105688579298537) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.257844e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.120974e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.292052e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.107700e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +196,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,9 +204,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094175850060040] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4035s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3878s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0157s for 8192 events => throughput is 5.23E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2605s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2487s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0118s for 8192 events => throughput is 6.93E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -234,7 +229,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -242,9 +237,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105684763984058] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.5166s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3428s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1737s for 90112 events => throughput is 5.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9982s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8679s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1302s for 90112 events => throughput is 6.92E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -255,14 +250,14 @@ OK! xsec from fortran (47.105695279989114) and cpp (47.105684763984058) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.218996e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.809775e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.263655e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.899155e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -277,7 +272,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -285,9 +280,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094173652938650] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3915s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3833s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0082s for 8192 events => throughput is 1.00E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2492s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2431s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0061s for 8192 events => throughput is 1.34E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -310,7 +305,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -318,9 +313,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105684048677361] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4300s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3376s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0925s for 90112 events => throughput is 9.74E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9267s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8595s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0672s for 90112 events => throughput is 1.34E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -331,14 +326,14 @@ OK! xsec from fortran (47.105695279989114) and cpp (47.105684048677361) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.896073e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.403362e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.940492e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.409433e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -353,7 +348,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -361,9 +356,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094173652938650] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3898s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3819s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0079s for 8192 events => throughput is 1.04E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2496s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2437s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0059s for 8192 events => throughput is 1.39E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -386,7 +381,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -394,9 +389,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105684048677361] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4168s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3312s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0856s for 90112 events => throughput is 1.05E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.9278s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8630s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0648s for 90112 events => throughput is 1.39E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -407,14 +402,14 @@ OK! xsec from fortran (47.105695279989114) and cpp (47.105684048677361) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.032975e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.446646e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.050779e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.465351e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -429,7 +424,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -437,9 +432,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094178213275804] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3948s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3837s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0111s for 8192 events => throughput is 7.41E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2459s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2407s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0052s for 8192 events => throughput is 1.57E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -462,7 +457,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -470,9 +465,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105688407939567] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4646s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3397s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1249s for 90112 events => throughput is 7.22E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9192s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8622s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0570s for 90112 events => throughput is 1.58E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -483,121 +478,17 @@ OK! xsec from fortran (47.105695279989114) and cpp (47.105688407939567) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.304914e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.607550e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.408593e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184344050284] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.8097s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8091s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.50E+07 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.094184803756640) and cuda (47.094184344050284) differ by less than 4E-4 (9.761425112664313e-09) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105694586476879] fbridge_mode=1 - [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7654s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7594s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0060s for 90112 events => throughput is 1.51E+07 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.105695279989114) and cuda (47.105694586476879) differ by less than 4E-4 (1.4722471020078842e-08) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.397168e+07 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.912682e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.099083e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.785250e+08 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.065057e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.885291e+08 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.649544e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.622041e+06 ) sec^-1 -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.436840e+07 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index 197f6200da..21f98092a2 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -1,41 +1,36 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx - -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' - make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-05-16_01:27:37 +DATE: 2024-05-16_15:29:22 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -50,7 +45,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +53,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 2601 events (found 5405 events) - [COUNTERS] PROGRAM TOTAL : 0.8243s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7810s - [COUNTERS] Fortran MEs ( 1 ) : 0.0433s for 8192 events => throughput is 1.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5266s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4946s + [COUNTERS] Fortran MEs ( 1 ) : 0.0321s for 8192 events => throughput is 2.55E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -75,7 +70,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +78,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4109s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3676s - [COUNTERS] Fortran MEs ( 1 ) : 0.0433s for 8192 events => throughput is 1.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2699s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2378s + [COUNTERS] Fortran MEs ( 1 ) : 0.0321s for 8192 events => throughput is 2.55E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -100,7 +95,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/output_ggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +103,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7536s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2749s - [COUNTERS] Fortran MEs ( 1 ) : 0.4787s for 90112 events => throughput is 1.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.2063s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8547s + [COUNTERS] Fortran MEs ( 1 ) : 0.3516s for 90112 events => throughput is 2.56E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,7 +120,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -133,9 +128,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094186141863901] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4520s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4121s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0400s for 8192 events => throughput is 2.05E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2971s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2674s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0297s for 8192 events => throughput is 2.76E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -158,7 +153,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -166,9 +161,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696630006634] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.8139s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3740s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4399s for 90112 events => throughput is 2.05E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.2144s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8891s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3253s for 90112 events => throughput is 2.77E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -179,14 +174,14 @@ OK! xsec from fortran (47.105695279989114) and cpp (47.105696630006634) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.079375e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.772812e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.079476e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.835026e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +196,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,9 +204,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094186141863901] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4154s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3930s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0224s for 8192 events => throughput is 3.67E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2690s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2527s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0163s for 8192 events => throughput is 5.02E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -234,7 +229,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -242,9 +237,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696630006626] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.5989s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3477s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2513s for 90112 events => throughput is 3.59E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.0487s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8696s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1791s for 90112 events => throughput is 5.03E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -255,14 +250,14 @@ OK! xsec from fortran (47.105695279989114) and cpp (47.105696630006626) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.535235e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.010731e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.688874e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.141504e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -277,7 +272,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -285,9 +280,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4007s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3868s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0139s for 8192 events => throughput is 5.90E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2573s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2474s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0099s for 8192 events => throughput is 8.24E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -310,7 +305,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -318,9 +313,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.5000s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3471s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1530s for 90112 events => throughput is 5.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9721s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8635s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1086s for 90112 events => throughput is 8.30E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -331,14 +326,14 @@ OK! xsec from fortran (47.105695279989114) and cpp (47.105696663215774) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.781843e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.587881e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.986506e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.652671e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -353,7 +348,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -361,9 +356,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4023s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3898s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0125s for 8192 events => throughput is 6.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2547s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2451s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0096s for 8192 events => throughput is 8.54E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -386,7 +381,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -394,9 +389,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4677s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3316s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1361s for 90112 events => throughput is 6.62E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9755s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8710s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1046s for 90112 events => throughput is 8.62E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -407,14 +402,14 @@ OK! xsec from fortran (47.105695279989114) and cpp (47.105696663215774) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.671424e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.860391e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.782154e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.013862e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -429,7 +424,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -437,9 +432,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4150s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3944s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0206s for 8192 events => throughput is 3.98E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2554s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2462s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0092s for 8192 events => throughput is 8.94E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -462,7 +457,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -470,9 +465,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.5820s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3527s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2292s for 90112 events => throughput is 3.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9648s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8641s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1007s for 90112 events => throughput is 8.95E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -483,121 +478,17 @@ OK! xsec from fortran (47.105695279989114) and cpp (47.105696663215774) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.998616e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.885304e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.990048e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184798437830] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.8154s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8148s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.45E+07 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.094184803756640) and cuda (47.094184798437830) differ by less than 2E-4 (1.1293987967064822e-10) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105695279068492] fbridge_mode=1 - [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7703s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7635s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0068s for 90112 events => throughput is 1.32E+07 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.105695279989114) and cuda (47.105695279068492) differ by less than 2E-4 (1.954369999168648e-11) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.143723e+07 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.636090e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.182886e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.066867e+08 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.180722e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.146460e+08 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.158610e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.030042e+05 ) sec^-1 -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.030823e+07 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index 7f0ff41464..b5560890f7 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -1,41 +1,36 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg - -make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 - -make USEBUILDDIR=1 BACKEND=cppavx2 - make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' + + +make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2024-05-16_01:28:04 +DATE: 2024-05-16_15:29:32 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -50,7 +45,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +53,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 365 events (found 1496 events) - [COUNTERS] PROGRAM TOTAL : 0.7020s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3669s - [COUNTERS] Fortran MEs ( 1 ) : 0.3350s for 8192 events => throughput is 2.45E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4931s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2363s + [COUNTERS] Fortran MEs ( 1 ) : 0.2568s for 8192 events => throughput is 3.19E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -75,7 +70,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +78,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6627s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3273s - [COUNTERS] Fortran MEs ( 1 ) : 0.3354s for 8192 events => throughput is 2.44E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4664s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2101s + [COUNTERS] Fortran MEs ( 1 ) : 0.2562s for 8192 events => throughput is 3.20E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -100,7 +95,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/output_ggttg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +103,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.2831s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5823s - [COUNTERS] Fortran MEs ( 1 ) : 3.7008s for 90112 events => throughput is 2.43E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.8423s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0257s + [COUNTERS] Fortran MEs ( 1 ) : 2.8165s for 90112 events => throughput is 3.20E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,7 +120,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -133,9 +128,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 1.0127s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6679s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3448s for 8192 events => throughput is 2.38E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7357s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4735s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2622s for 8192 events => throughput is 3.12E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -158,7 +153,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -166,9 +161,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717666E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.7090s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9162s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.7928s for 90112 events => throughput is 2.38E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.1552s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2739s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.8813s for 90112 events => throughput is 3.13E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -179,14 +174,14 @@ OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717666E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.444412e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.225002e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.433186e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.233456e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +196,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,9 +204,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607748863] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6869s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5073s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1796s for 8192 events => throughput is 4.56E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4751s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3411s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1340s for 8192 events => throughput is 6.11E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -234,7 +229,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -242,9 +237,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717666E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 3.7766s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7859s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.9906s for 90112 events => throughput is 4.53E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.6253s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1520s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.4733s for 90112 events => throughput is 6.12E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -255,14 +250,14 @@ OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717666E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.630294e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.259323e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.629345e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.287634e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -277,7 +272,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -285,9 +280,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5178s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4262s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0916s for 8192 events => throughput is 8.94E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.3361s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2728s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0633s for 8192 events => throughput is 1.29E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -310,7 +305,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -318,9 +313,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.7021s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6979s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0042s for 90112 events => throughput is 8.97E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.7948s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0991s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6957s for 90112 events => throughput is 1.30E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -331,14 +326,14 @@ OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717722E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.244917e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.325938e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.251295e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.324831e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -353,7 +348,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -361,9 +356,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4953s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4129s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0824s for 8192 events => throughput is 9.95E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.3290s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2691s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0599s for 8192 events => throughput is 1.37E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -386,7 +381,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -394,9 +389,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.5931s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6918s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9014s for 90112 events => throughput is 1.00E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7437s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0841s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6595s for 90112 events => throughput is 1.37E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -407,14 +402,14 @@ OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717722E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.033892e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.401744e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.035720e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.402920e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -429,7 +424,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -437,9 +432,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5700s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4516s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1184s for 8192 events => throughput is 6.92E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.2953s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2525s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0428s for 8192 events => throughput is 1.91E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -462,7 +457,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -470,9 +465,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 3.0137s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7172s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2965s for 90112 events => throughput is 6.95E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.5450s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0758s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4692s for 90112 events => throughput is 1.92E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -483,121 +478,17 @@ OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717722E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.144179e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.927281e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.156532e+04 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=1 - [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.7726s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7672s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.52E+06 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.10112748607749111) and cuda (0.10112748607749111) differ by less than 3E-14 (0.0) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238481932717736E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.0397s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0164s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0233s for 90112 events => throughput is 3.87E+06 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.9238481932717722E-002) and cuda (7.9238481932717736E-002) differ by less than 3E-14 (2.220446049250313e-16) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.642318e+06 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.930638e+06 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.882259e+06 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.244433e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.893041e+06 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.255841e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.907568e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.958513e+05 ) sec^-1 -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.774192e+06 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 1a8c36aa43..0eae6835ae 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -1,41 +1,36 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg - -make USEBUILDDIR=1 BACKEND=cuda +Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2024-05-16_01:28:48 +DATE: 2024-05-16_15:29:53 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -50,7 +45,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +53,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 365 events (found 1496 events) - [COUNTERS] PROGRAM TOTAL : 0.7057s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3678s - [COUNTERS] Fortran MEs ( 1 ) : 0.3379s for 8192 events => throughput is 2.42E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4912s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2344s + [COUNTERS] Fortran MEs ( 1 ) : 0.2567s for 8192 events => throughput is 3.19E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -75,7 +70,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +78,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6650s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3295s - [COUNTERS] Fortran MEs ( 1 ) : 0.3355s for 8192 events => throughput is 2.44E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4667s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2105s + [COUNTERS] Fortran MEs ( 1 ) : 0.2563s for 8192 events => throughput is 3.20E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -100,7 +95,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/output_ggttg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +103,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.3113s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5981s - [COUNTERS] Fortran MEs ( 1 ) : 3.7132s for 90112 events => throughput is 2.43E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.8451s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0270s + [COUNTERS] Fortran MEs ( 1 ) : 2.8182s for 90112 events => throughput is 3.20E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,7 +120,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -133,9 +128,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112722621426752] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.9881s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6537s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3344s for 8192 events => throughput is 2.45E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7117s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4568s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2549s for 8192 events => throughput is 3.21E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -158,7 +153,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -166,9 +161,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238468310179624E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.5951s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9082s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.6869s for 90112 events => throughput is 2.44E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.0795s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2753s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.8043s for 90112 events => throughput is 3.21E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -179,14 +174,14 @@ OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238468310179624E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.531268e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.321881e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.536337e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.327799e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +196,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,9 +204,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112720710186394] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5295s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4297s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0998s for 8192 events => throughput is 8.21E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.3616s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2865s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0751s for 8192 events => throughput is 1.09E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -234,7 +229,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -242,9 +237,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238454786658835E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.7829s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6780s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1050s for 90112 events => throughput is 8.16E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.9233s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0962s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8271s for 90112 events => throughput is 1.09E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -255,14 +250,14 @@ OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238454786658835E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.418674e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.110453e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.378970e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.110952e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -277,7 +272,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -285,9 +280,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112721766950902] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4226s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3763s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0463s for 8192 events => throughput is 1.77E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2773s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2441s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0333s for 8192 events => throughput is 2.46E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -310,7 +305,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -318,9 +313,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238453735016964E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.1354s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6236s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5118s for 90112 events => throughput is 1.76E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4188s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0535s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3654s for 90112 events => throughput is 2.47E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -331,14 +326,14 @@ OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238453735016964E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.814902e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.540175e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.818224e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.544722e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -353,7 +348,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -361,9 +356,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112721766950902] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4122s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3704s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0417s for 8192 events => throughput is 1.96E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2721s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2410s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0312s for 8192 events => throughput is 2.63E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -386,7 +381,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -394,9 +389,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238453735016964E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.0774s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6180s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4593s for 90112 events => throughput is 1.96E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3955s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0527s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3428s for 90112 events => throughput is 2.63E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -407,14 +402,14 @@ OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238453735016964E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.016267e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.701778e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.024583e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.701640e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -429,7 +424,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -437,9 +432,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112723387847480] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4393s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3831s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0562s for 8192 events => throughput is 1.46E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2507s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2303s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0204s for 8192 events => throughput is 4.02E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -462,7 +457,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -470,9 +465,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238464410949921E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.2433s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6283s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6150s for 90112 events => throughput is 1.47E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.2644s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0403s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2240s for 90112 events => throughput is 4.02E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -483,121 +478,17 @@ OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238464410949921E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.471448e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.017502e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.496104e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112726034625694] fbridge_mode=1 - [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.7661s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7652s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0009s for 8192 events => throughput is 9.57E+06 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.10112748607749111) and cuda (0.10112726034625694) differ by less than 4E-4 (2.2321452152196386e-06) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238473828077680E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.0217s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0116s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0101s for 90112 events => throughput is 8.94E+06 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.9238481932717722E-002) and cuda (7.9238473828077680E-002) differ by less than 4E-4 (1.0228161673175862e-07) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.279804e+07 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.849139e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.708780e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.376255e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.741880e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.526731e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.576787e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.055236e+05 ) sec^-1 -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.628936e+07 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index 06cc385635..a059b7dc3a 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -1,41 +1,36 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 - -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2024-05-16_01:29:27 +DATE: 2024-05-16_15:30:11 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -50,7 +45,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +53,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 365 events (found 1496 events) - [COUNTERS] PROGRAM TOTAL : 0.7042s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3668s - [COUNTERS] Fortran MEs ( 1 ) : 0.3374s for 8192 events => throughput is 2.43E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4931s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2367s + [COUNTERS] Fortran MEs ( 1 ) : 0.2564s for 8192 events => throughput is 3.19E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -75,7 +70,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +78,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6678s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3296s - [COUNTERS] Fortran MEs ( 1 ) : 0.3383s for 8192 events => throughput is 2.42E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4693s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2130s + [COUNTERS] Fortran MEs ( 1 ) : 0.2562s for 8192 events => throughput is 3.20E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -100,7 +95,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/output_ggttg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +103,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.3035s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5927s - [COUNTERS] Fortran MEs ( 1 ) : 3.7109s for 90112 events => throughput is 2.43E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.8534s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0340s + [COUNTERS] Fortran MEs ( 1 ) : 2.8193s for 90112 events => throughput is 3.20E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,7 +120,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -133,9 +128,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748700702684] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 1.0211s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6702s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3509s for 8192 events => throughput is 2.33E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7386s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4711s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2675s for 8192 events => throughput is 3.06E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -158,7 +153,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -166,9 +161,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238482679400354E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.7816s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9205s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.8611s for 90112 events => throughput is 2.33E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.2166s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2876s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.9290s for 90112 events => throughput is 3.08E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -179,14 +174,14 @@ OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482679400354E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.398467e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.171564e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.396772e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.176804e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +196,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,9 +204,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748702805033] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6812s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5038s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1774s for 8192 events => throughput is 4.62E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4715s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3391s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1324s for 8192 events => throughput is 6.19E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -234,7 +229,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -242,9 +237,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238482683055667E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 3.6988s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7524s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.9464s for 90112 events => throughput is 4.63E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.6066s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1524s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.4541s for 90112 events => throughput is 6.20E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -255,14 +250,14 @@ OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482683055667E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.777911e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.373029e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.770421e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.384878e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -277,7 +272,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -285,9 +280,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748681415580] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5041s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4149s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0892s for 8192 events => throughput is 9.19E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.3356s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2723s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0632s for 8192 events => throughput is 1.30E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -310,7 +305,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -318,9 +313,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238482534347232E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.6526s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6680s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9846s for 90112 events => throughput is 9.15E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.7794s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0852s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6942s for 90112 events => throughput is 1.30E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -331,14 +326,14 @@ OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482534347232E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.374488e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.328079e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.304457e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.330880e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -353,7 +348,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -361,9 +356,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748681415580] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4867s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4079s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0788s for 8192 events => throughput is 1.04E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3260s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2671s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0589s for 8192 events => throughput is 1.39E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -386,7 +381,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -394,9 +389,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238482534347232E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.5251s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6551s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8699s for 90112 events => throughput is 1.04E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7265s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0792s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6473s for 90112 events => throughput is 1.39E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -407,14 +402,14 @@ OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482534347232E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.072957e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.433864e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.074127e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.433043e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -429,7 +424,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -437,9 +432,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748700265108] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5672s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4475s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1197s for 8192 events => throughput is 6.85E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.2991s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2545s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0446s for 8192 events => throughput is 1.84E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -462,7 +457,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -470,9 +465,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238482666076374E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 3.0147s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7041s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.3105s for 90112 events => throughput is 6.88E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.5607s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0699s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4908s for 90112 events => throughput is 1.84E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -483,121 +478,17 @@ OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482666076374E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.810756e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.818790e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.935663e+04 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748601943165] fbridge_mode=1 - [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.7757s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7703s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.51E+06 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.10112748607749111) and cuda (0.10112748601943165) differ by less than 2E-4 (5.74121417074025e-10) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238481937154381E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.0407s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0176s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0230s for 90112 events => throughput is 3.91E+06 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.9238481932717722E-002) and cuda (7.9238481937154381E-002) differ by less than 2E-4 (5.5991211667105745e-11) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.631069e+06 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.120692e+06 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.856212e+06 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.234939e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.866138e+06 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.243613e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.862499e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.858942e+05 ) sec^-1 -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.731505e+06 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index 744dd47e66..41a5914b73 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -1,41 +1,36 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg - -make USEBUILDDIR=1 BACKEND=cuda +Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2024-05-16_01:30:11 +DATE: 2024-05-16_15:30:32 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -50,7 +45,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +53,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 [UNWEIGHT] Wrote 62 events (found 950 events) - [COUNTERS] PROGRAM TOTAL : 4.8074s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3646s - [COUNTERS] Fortran MEs ( 1 ) : 4.4427s for 8192 events => throughput is 1.84E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.6763s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2337s + [COUNTERS] Fortran MEs ( 1 ) : 3.4427s for 8192 events => throughput is 2.38E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -75,7 +70,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +78,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 4.8076s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3603s - [COUNTERS] Fortran MEs ( 1 ) : 4.4473s for 8192 events => throughput is 1.84E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.6754s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2324s + [COUNTERS] Fortran MEs ( 1 ) : 3.4430s for 8192 events => throughput is 2.38E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -100,7 +95,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/output_ggttgg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +103,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=0 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 50.3676s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1555s - [COUNTERS] Fortran MEs ( 1 ) : 48.2121s for 90112 events => throughput is 1.87E+03 events/s + [COUNTERS] PROGRAM TOTAL : 39.2780s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4056s + [COUNTERS] Fortran MEs ( 1 ) : 37.8724s for 90112 events => throughput is 2.38E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,7 +120,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -133,9 +128,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102372E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 9.2196s - [COUNTERS] Fortran Overhead ( 0 ) : 4.7253s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.4943s for 8192 events => throughput is 1.82E+03 events/s + [COUNTERS] PROGRAM TOTAL : 6.9383s + [COUNTERS] Fortran Overhead ( 0 ) : 3.5306s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.4078s for 8192 events => throughput is 2.40E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -158,7 +153,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -166,9 +161,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438642451704E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 56.2683s - [COUNTERS] Fortran Overhead ( 0 ) : 6.5201s - [COUNTERS] CudaCpp MEs ( 2 ) : 49.7482s for 90112 events => throughput is 1.81E+03 events/s + [COUNTERS] PROGRAM TOTAL : 42.2142s + [COUNTERS] Fortran Overhead ( 0 ) : 4.6990s + [COUNTERS] CudaCpp MEs ( 2 ) : 37.5151s for 90112 events => throughput is 2.40E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -179,14 +174,14 @@ OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438642451704E-004 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.868635e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.489722e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.874481e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.491645e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +196,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,9 +204,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102367E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 5.0549s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6552s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.3997s for 8192 events => throughput is 3.41E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.7132s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9362s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.7770s for 8192 events => throughput is 4.61E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -234,7 +229,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -242,9 +237,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438642451701E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 30.7729s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4162s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.3568s for 90112 events => throughput is 3.42E+03 events/s + [COUNTERS] PROGRAM TOTAL : 22.5922s + [COUNTERS] Fortran Overhead ( 0 ) : 3.1055s + [COUNTERS] CudaCpp MEs ( 2 ) : 19.4867s for 90112 events => throughput is 4.62E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -255,14 +250,14 @@ OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438642451701E-004 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.598914e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.833050e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.606768e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.829430e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -277,7 +272,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -285,9 +280,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102367E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 2.4142s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3728s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0413s for 8192 events => throughput is 7.87E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.7099s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9557s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7541s for 8192 events => throughput is 1.09E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -310,7 +305,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -318,9 +313,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 14.6025s - [COUNTERS] Fortran Overhead ( 0 ) : 3.1268s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.4757s for 90112 events => throughput is 7.85E+03 events/s + [COUNTERS] PROGRAM TOTAL : 10.4145s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1236s + [COUNTERS] CudaCpp MEs ( 2 ) : 8.2909s for 90112 events => throughput is 1.09E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -331,14 +326,14 @@ OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438642451712E-004 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.082204e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.134734e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.087591e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.133067e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -353,7 +348,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -361,9 +356,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102367E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 2.1594s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2438s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9156s for 8192 events => throughput is 8.95E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.5280s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8707s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6573s for 8192 events => throughput is 1.25E+04 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -386,7 +381,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -394,9 +389,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 13.1097s - [COUNTERS] Fortran Overhead ( 0 ) : 3.0095s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.1002s for 90112 events => throughput is 8.92E+03 events/s + [COUNTERS] PROGRAM TOTAL : 9.2847s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0423s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.2424s for 90112 events => throughput is 1.24E+04 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -407,14 +402,14 @@ OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438642451712E-004 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.157056e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.281427e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.190937e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.280000e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -429,7 +424,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -437,9 +432,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102367E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 2.7205s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5345s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1860s for 8192 events => throughput is 6.91E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.0455s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6333s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4122s for 8192 events => throughput is 1.99E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -462,7 +457,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -470,9 +465,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 16.4021s - [COUNTERS] Fortran Overhead ( 0 ) : 3.3045s - [COUNTERS] CudaCpp MEs ( 2 ) : 13.0976s for 90112 events => throughput is 6.88E+03 events/s + [COUNTERS] PROGRAM TOTAL : 6.3433s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8033s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.5400s for 90112 events => throughput is 1.98E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -483,121 +478,17 @@ OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438642451712E-004 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.943395e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.996762e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.968493e+03 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703612510102367E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 0.9037s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8707s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0330s for 8192 events => throughput is 2.48E+05 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (3.8703612510102356E-004) and cuda (3.8703612510102367E-004) differ by less than 3E-14 (2.220446049250313e-16) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 2.9957s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6318s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3638s for 90112 events => throughput is 2.48E+05 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.5793438642451712E-004) and cuda (1.5793438642451712E-004) differ by less than 3E-14 (0.0) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.275863e+05 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.513394e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.126700e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.163753e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.128674e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.183392e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.130320e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.036396e+04 ) sec^-1 -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.455396e+05 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 97726609cd..702017d2bd 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -1,41 +1,36 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' - make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2024-05-16_01:34:39 +DATE: 2024-05-16_15:33:19 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -50,7 +45,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +53,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 [UNWEIGHT] Wrote 62 events (found 950 events) - [COUNTERS] PROGRAM TOTAL : 4.7506s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3591s - [COUNTERS] Fortran MEs ( 1 ) : 4.3916s for 8192 events => throughput is 1.87E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.6781s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2341s + [COUNTERS] Fortran MEs ( 1 ) : 3.4440s for 8192 events => throughput is 2.38E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -75,7 +70,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +78,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 4.7327s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3562s - [COUNTERS] Fortran MEs ( 1 ) : 4.3765s for 8192 events => throughput is 1.87E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.6729s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2304s + [COUNTERS] Fortran MEs ( 1 ) : 3.4426s for 8192 events => throughput is 2.38E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -100,7 +95,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/output_ggttgg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +103,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=0 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 50.4568s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1771s - [COUNTERS] Fortran MEs ( 1 ) : 48.2797s for 90112 events => throughput is 1.87E+03 events/s + [COUNTERS] PROGRAM TOTAL : 39.2820s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4068s + [COUNTERS] Fortran MEs ( 1 ) : 37.8752s for 90112 events => throughput is 2.38E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,7 +120,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -133,9 +128,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703729438336302E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 8.9135s - [COUNTERS] Fortran Overhead ( 0 ) : 4.5714s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.3421s for 8192 events => throughput is 1.89E+03 events/s + [COUNTERS] PROGRAM TOTAL : 6.7566s + [COUNTERS] Fortran Overhead ( 0 ) : 3.4473s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.3094s for 8192 events => throughput is 2.48E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -158,7 +153,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -166,9 +161,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793486626492658E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 54.1190s - [COUNTERS] Fortran Overhead ( 0 ) : 6.3214s - [COUNTERS] CudaCpp MEs ( 2 ) : 47.7976s for 90112 events => throughput is 1.89E+03 events/s + [COUNTERS] PROGRAM TOTAL : 41.0433s + [COUNTERS] Fortran Overhead ( 0 ) : 4.6304s + [COUNTERS] CudaCpp MEs ( 2 ) : 36.4129s for 90112 events => throughput is 2.47E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -179,14 +174,14 @@ OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793486626492658E-004 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.947180e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.548798e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.947353e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.549982e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +196,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,9 +204,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703722581317850E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 2.7184s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5261s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1923s for 8192 events => throughput is 6.87E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.9266s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0707s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8559s for 8192 events => throughput is 9.57E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -234,7 +229,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -242,9 +237,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793483759856148E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 16.5068s - [COUNTERS] Fortran Overhead ( 0 ) : 3.3022s - [COUNTERS] CudaCpp MEs ( 2 ) : 13.2046s for 90112 events => throughput is 6.82E+03 events/s + [COUNTERS] PROGRAM TOTAL : 11.7222s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2357s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.4866s for 90112 events => throughput is 9.50E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -255,14 +250,14 @@ OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793483759856148E-004 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.978396e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.806237e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.983793e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.796225e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -277,7 +272,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -285,9 +280,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703722425602170E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 1.4122s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8788s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5334s for 8192 events => throughput is 1.54E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.9719s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5943s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3776s for 8192 events => throughput is 2.17E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -310,7 +305,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -318,9 +313,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793483698376133E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 8.5565s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6677s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.8888s for 90112 events => throughput is 1.53E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.9221s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7641s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.1580s for 90112 events => throughput is 2.17E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -331,14 +326,14 @@ OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793483698376133E-004 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.577584e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.266412e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.577489e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.265132e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -353,7 +348,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -361,9 +356,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703722425602170E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 1.2916s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8193s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4723s for 8192 events => throughput is 1.73E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8920s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5583s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3337s for 8192 events => throughput is 2.46E+04 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -386,7 +381,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -394,9 +389,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793483698376133E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 7.7633s - [COUNTERS] Fortran Overhead ( 0 ) : 2.5957s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.1676s for 90112 events => throughput is 1.74E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.4023s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7278s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.6745s for 90112 events => throughput is 2.45E+04 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -407,14 +402,14 @@ OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793483698376133E-004 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.818661e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.522397e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.824534e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.523702e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -429,7 +424,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -437,9 +432,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703728658657426E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 1.5274s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9418s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5856s for 8192 events => throughput is 1.40E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6316s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4285s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2031s for 8192 events => throughput is 4.03E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -462,7 +457,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -470,9 +465,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793486977281547E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 9.1749s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6987s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.4761s for 90112 events => throughput is 1.39E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.8543s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6032s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.2511s for 90112 events => throughput is 4.00E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -483,121 +478,17 @@ OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793486977281547E-004 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.413533e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.096014e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.415193e+04 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703736267486325E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 0.8657s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8443s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0214s for 8192 events => throughput is 3.83E+05 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (3.8703612510102356E-004) and cuda (3.8703736267486325E-004) differ by less than 4E-4 (3.1975667371675343e-06) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793489323670813E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 2.8423s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6065s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2358s for 90112 events => throughput is 3.82E+05 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.5793438642451712E-004) and cuda (1.5793489323670813E-004) differ by less than 4E-4 (3.20900471706409e-06) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.583101e+05 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.931306e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.570292e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.724382e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.573894e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.720622e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.518798e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.128816e+04 ) sec^-1 -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.538253e+05 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 9161616d22..67d53f2784 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,41 +1,36 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' - make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2024-05-16_01:38:10 +DATE: 2024-05-16_15:35:36 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -50,7 +45,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +53,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 [UNWEIGHT] Wrote 62 events (found 950 events) - [COUNTERS] PROGRAM TOTAL : 4.7461s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3596s - [COUNTERS] Fortran MEs ( 1 ) : 4.3864s for 8192 events => throughput is 1.87E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.6775s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2340s + [COUNTERS] Fortran MEs ( 1 ) : 3.4435s for 8192 events => throughput is 2.38E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -75,7 +70,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +78,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 4.7384s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3570s - [COUNTERS] Fortran MEs ( 1 ) : 4.3814s for 8192 events => throughput is 1.87E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.6718s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2300s + [COUNTERS] Fortran MEs ( 1 ) : 3.4418s for 8192 events => throughput is 2.38E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -100,7 +95,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/output_ggttgg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +103,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=0 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 50.4226s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1630s - [COUNTERS] Fortran MEs ( 1 ) : 48.2596s for 90112 events => throughput is 1.87E+03 events/s + [COUNTERS] PROGRAM TOTAL : 39.2889s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4086s + [COUNTERS] Fortran MEs ( 1 ) : 37.8803s for 90112 events => throughput is 2.38E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,7 +120,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -133,9 +128,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612659176674E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 9.4283s - [COUNTERS] Fortran Overhead ( 0 ) : 4.8187s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.6095s for 8192 events => throughput is 1.78E+03 events/s + [COUNTERS] PROGRAM TOTAL : 6.9946s + [COUNTERS] Fortran Overhead ( 0 ) : 3.5542s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.4404s for 8192 events => throughput is 2.38E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -158,7 +153,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -166,9 +161,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438704534934E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 56.9207s - [COUNTERS] Fortran Overhead ( 0 ) : 6.6165s - [COUNTERS] CudaCpp MEs ( 2 ) : 50.3042s for 90112 events => throughput is 1.79E+03 events/s + [COUNTERS] PROGRAM TOTAL : 42.6113s + [COUNTERS] Fortran Overhead ( 0 ) : 4.7229s + [COUNTERS] CudaCpp MEs ( 2 ) : 37.8884s for 90112 events => throughput is 2.38E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -179,14 +174,14 @@ OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438704534934E-004 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.845749e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.467157e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.855084e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.466289e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +196,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,9 +204,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612692816703E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 5.0818s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6854s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.3963s for 8192 events => throughput is 3.42E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.7504s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9597s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.7907s for 8192 events => throughput is 4.57E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -234,7 +229,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -242,9 +237,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438707226035E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 30.7891s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4469s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.3422s for 90112 events => throughput is 3.42E+03 events/s + [COUNTERS] PROGRAM TOTAL : 22.8597s + [COUNTERS] Fortran Overhead ( 0 ) : 3.1417s + [COUNTERS] CudaCpp MEs ( 2 ) : 19.7180s for 90112 events => throughput is 4.57E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -255,14 +250,14 @@ OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438707226035E-004 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.522464e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.739127e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.523204e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.736551e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -277,7 +272,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -285,9 +280,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612675240517E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 2.3962s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3623s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0339s for 8192 events => throughput is 7.92E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.6555s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9393s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7163s for 8192 events => throughput is 1.14E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -310,7 +305,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -318,9 +313,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438703631775E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 14.5036s - [COUNTERS] Fortran Overhead ( 0 ) : 3.1328s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.3708s for 90112 events => throughput is 7.92E+03 events/s + [COUNTERS] PROGRAM TOTAL : 9.9956s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1076s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.8880s for 90112 events => throughput is 1.14E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -331,14 +326,14 @@ OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438703631775E-004 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.125066e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.160709e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.074324e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.163380e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -353,7 +348,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -361,9 +356,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612675240517E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 2.1502s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2343s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9159s for 8192 events => throughput is 8.94E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.5266s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8679s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6586s for 8192 events => throughput is 1.24E+04 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -386,7 +381,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -394,9 +389,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438703631775E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 12.9975s - [COUNTERS] Fortran Overhead ( 0 ) : 3.0012s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.9962s for 90112 events => throughput is 9.01E+03 events/s + [COUNTERS] PROGRAM TOTAL : 9.2873s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0370s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.2504s for 90112 events => throughput is 1.24E+04 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -407,14 +402,14 @@ OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438703631775E-004 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.351869e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.286158e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.208773e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.287153e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -429,7 +424,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -437,9 +432,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612675240517E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 2.7414s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5394s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2020s for 8192 events => throughput is 6.82E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.0501s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6366s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4135s for 8192 events => throughput is 1.98E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -462,7 +457,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -470,9 +465,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438703631775E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 16.5337s - [COUNTERS] Fortran Overhead ( 0 ) : 3.3003s - [COUNTERS] CudaCpp MEs ( 2 ) : 13.2334s for 90112 events => throughput is 6.81E+03 events/s + [COUNTERS] PROGRAM TOTAL : 6.3641s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8154s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.5487s for 90112 events => throughput is 1.98E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -483,121 +478,17 @@ OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438703631775E-004 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.881370e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.015519e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.943865e+03 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703612512203166E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 0.9014s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8685s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0329s for 8192 events => throughput is 2.49E+05 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (3.8703612510102356E-004) and cuda (3.8703612512203166E-004) differ by less than 2E-4 (5.427946980773868e-11) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793438642387717E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 2.9899s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6262s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3637s for 90112 events => throughput is 2.48E+05 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.5793438642451712E-004) and cuda (1.5793438642387717E-004) differ by less than 2E-4 (4.051980972974434e-12) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.280457e+05 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.523385e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.121733e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.162091e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.125438e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.168234e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.125929e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.984971e+04 ) sec^-1 -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.451563e+05 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index f87c8c9cf1..6444164bd8 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,41 +1,36 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg - -make USEBUILDDIR=1 BACKEND=cuda +Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg make USEBUILDDIR=1 BACKEND=cppnone + make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppavx2 -make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' - make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2024-05-16_01:43:56 +DATE: 2024-05-16_15:38:59 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -50,7 +45,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +53,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 101.7235s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5049s - [COUNTERS] Fortran MEs ( 1 ) : 101.2186s for 8192 events => throughput is 8.09E+01 events/s + [COUNTERS] PROGRAM TOTAL : 84.9678s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3229s + [COUNTERS] Fortran MEs ( 1 ) : 84.6449s for 8192 events => throughput is 9.68E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -75,7 +70,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +78,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 101.7703s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5109s - [COUNTERS] Fortran MEs ( 1 ) : 101.2594s for 8192 events => throughput is 8.09E+01 events/s + [COUNTERS] PROGRAM TOTAL : 85.2157s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3241s + [COUNTERS] Fortran MEs ( 1 ) : 84.8915s for 8192 events => throughput is 9.65E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -100,7 +95,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +103,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1119.4272s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4297s - [COUNTERS] Fortran MEs ( 1 ) : 1114.9976s for 90112 events => throughput is 8.08E+01 events/s + [COUNTERS] PROGRAM TOTAL : 934.9385s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8187s + [COUNTERS] Fortran MEs ( 1 ) : 932.1198s for 90112 events => throughput is 9.67E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,7 +120,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -133,9 +128,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939193E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 236.4243s - [COUNTERS] Fortran Overhead ( 0 ) : 108.7156s - [COUNTERS] CudaCpp MEs ( 2 ) : 127.7087s for 8192 events => throughput is 6.41E+01 events/s + [COUNTERS] PROGRAM TOTAL : 179.3097s + [COUNTERS] Fortran Overhead ( 0 ) : 82.5336s + [COUNTERS] CudaCpp MEs ( 2 ) : 96.7761s for 8192 events => throughput is 8.46E+01 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -158,7 +153,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -166,9 +161,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1507.2346s - [COUNTERS] Fortran Overhead ( 0 ) : 112.2012s - [COUNTERS] CudaCpp MEs ( 2 ) : 1395.0334s for 90112 events => throughput is 6.46E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1149.6533s + [COUNTERS] Fortran Overhead ( 0 ) : 85.0442s + [COUNTERS] CudaCpp MEs ( 2 ) : 1064.6091s for 90112 events => throughput is 8.46E+01 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -179,14 +174,14 @@ OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656014E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.611342e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.973924e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.612518e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.984488e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +196,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,9 +204,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939197E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 114.8253s - [COUNTERS] Fortran Overhead ( 0 ) : 52.9129s - [COUNTERS] CudaCpp MEs ( 2 ) : 61.9124s for 8192 events => throughput is 1.32E+02 events/s + [COUNTERS] PROGRAM TOTAL : 93.3071s + [COUNTERS] Fortran Overhead ( 0 ) : 43.4204s + [COUNTERS] CudaCpp MEs ( 2 ) : 49.8867s for 8192 events => throughput is 1.64E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -234,7 +229,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -242,9 +237,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086656017E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 734.6001s - [COUNTERS] Fortran Overhead ( 0 ) : 56.8950s - [COUNTERS] CudaCpp MEs ( 2 ) : 677.7051s for 90112 events => throughput is 1.33E+02 events/s + [COUNTERS] PROGRAM TOTAL : 596.0037s + [COUNTERS] Fortran Overhead ( 0 ) : 45.9477s + [COUNTERS] CudaCpp MEs ( 2 ) : 550.0560s for 90112 events => throughput is 1.64E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -255,14 +250,14 @@ OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656017E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.573216e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.903688e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.570652e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.904667e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -277,7 +272,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -285,9 +280,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 53.5594s - [COUNTERS] Fortran Overhead ( 0 ) : 24.8692s - [COUNTERS] CudaCpp MEs ( 2 ) : 28.6902s for 8192 events => throughput is 2.86E+02 events/s + [COUNTERS] PROGRAM TOTAL : 42.6389s + [COUNTERS] Fortran Overhead ( 0 ) : 19.5578s + [COUNTERS] CudaCpp MEs ( 2 ) : 23.0811s for 8192 events => throughput is 3.55E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -310,7 +305,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -318,9 +313,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 345.8816s - [COUNTERS] Fortran Overhead ( 0 ) : 28.6165s - [COUNTERS] CudaCpp MEs ( 2 ) : 317.2651s for 90112 events => throughput is 2.84E+02 events/s + [COUNTERS] PROGRAM TOTAL : 275.3646s + [COUNTERS] Fortran Overhead ( 0 ) : 22.0872s + [COUNTERS] CudaCpp MEs ( 2 ) : 253.2773s for 90112 events => throughput is 3.56E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -331,14 +326,14 @@ OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656014E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.346027e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.268981e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.397864e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.265198e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -353,7 +348,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -361,9 +356,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 47.5433s - [COUNTERS] Fortran Overhead ( 0 ) : 21.7991s - [COUNTERS] CudaCpp MEs ( 2 ) : 25.7442s for 8192 events => throughput is 3.18E+02 events/s + [COUNTERS] PROGRAM TOTAL : 37.5280s + [COUNTERS] Fortran Overhead ( 0 ) : 16.9907s + [COUNTERS] CudaCpp MEs ( 2 ) : 20.5374s for 8192 events => throughput is 3.99E+02 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -386,7 +381,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -394,9 +389,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 310.2994s - [COUNTERS] Fortran Overhead ( 0 ) : 25.7446s - [COUNTERS] CudaCpp MEs ( 2 ) : 284.5548s for 90112 events => throughput is 3.17E+02 events/s + [COUNTERS] PROGRAM TOTAL : 245.8133s + [COUNTERS] Fortran Overhead ( 0 ) : 19.6011s + [COUNTERS] CudaCpp MEs ( 2 ) : 226.2122s for 90112 events => throughput is 3.98E+02 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -407,14 +402,14 @@ OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656014E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.866314e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.923204e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.859864e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.917198e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -429,7 +424,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -437,9 +432,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 50.4926s - [COUNTERS] Fortran Overhead ( 0 ) : 24.7479s - [COUNTERS] CudaCpp MEs ( 2 ) : 25.7447s for 8192 events => throughput is 3.18E+02 events/s + [COUNTERS] PROGRAM TOTAL : 21.3856s + [COUNTERS] Fortran Overhead ( 0 ) : 9.9262s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.4594s for 8192 events => throughput is 7.15E+02 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -462,7 +457,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -470,9 +465,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 313.6701s - [COUNTERS] Fortran Overhead ( 0 ) : 28.5625s - [COUNTERS] CudaCpp MEs ( 2 ) : 285.1075s for 90112 events => throughput is 3.16E+02 events/s + [COUNTERS] PROGRAM TOTAL : 138.4678s + [COUNTERS] Fortran Overhead ( 0 ) : 12.4532s + [COUNTERS] CudaCpp MEs ( 2 ) : 126.0146s for 90112 events => throughput is 7.15E+02 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -483,121 +478,17 @@ OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656014E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.394651e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.525681e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.384790e+02 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985227939195E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 4.2708s - [COUNTERS] Fortran Overhead ( 0 ) : 3.1879s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0829s for 8192 events => throughput is 7.56E+03 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.2403985227939174E-006) and cuda (1.2403985227939195E-006) differ by less than 3E-14 (1.7763568394002505e-15) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993086656006E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 19.1407s - [COUNTERS] Fortran Overhead ( 0 ) : 7.1896s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.9511s for 90112 events => throughput is 7.54E+03 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.3322993086655967E-007) and cuda (2.3322993086656006E-007) differ by less than 3E-14 (1.7763568394002505e-15) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.518899e+03 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.266687e+03 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.285867e+03 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 512 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.577065e+03 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.302340e+03 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.485177e+03 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.239249e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.578168e+02 ) sec^-1 -*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.236704e+03 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index 9938780c0a..3cb851a6bf 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,41 +1,36 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg - +Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2024-05-16_03:16:41 +DATE: 2024-05-16_16:49:27 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -50,7 +45,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +53,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 101.6786s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5071s - [COUNTERS] Fortran MEs ( 1 ) : 101.1715s for 8192 events => throughput is 8.10E+01 events/s + [COUNTERS] PROGRAM TOTAL : 84.9806s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3237s + [COUNTERS] Fortran MEs ( 1 ) : 84.6569s for 8192 events => throughput is 9.68E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -75,7 +70,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +78,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 102.1420s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5108s - [COUNTERS] Fortran MEs ( 1 ) : 101.6312s for 8192 events => throughput is 8.06E+01 events/s + [COUNTERS] PROGRAM TOTAL : 85.0829s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3251s + [COUNTERS] Fortran MEs ( 1 ) : 84.7578s for 8192 events => throughput is 9.67E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -100,7 +95,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +103,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1119.6489s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4200s - [COUNTERS] Fortran MEs ( 1 ) : 1115.2289s for 90112 events => throughput is 8.08E+01 events/s + [COUNTERS] PROGRAM TOTAL : 935.4365s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8214s + [COUNTERS] Fortran MEs ( 1 ) : 932.6152s for 90112 events => throughput is 9.66E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -126,21 +121,21 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.241e-06 [1.2405719957040752E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.241e-06 [1.2405719950940886E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 207.9761s - [COUNTERS] Fortran Overhead ( 0 ) : 95.5518s - [COUNTERS] CudaCpp MEs ( 2 ) : 112.4243s for 8192 events => throughput is 7.29E+01 events/s + [COUNTERS] PROGRAM TOTAL : 172.3492s + [COUNTERS] Fortran Overhead ( 0 ) : 79.5418s + [COUNTERS] CudaCpp MEs ( 2 ) : 92.8075s for 8192 events => throughput is 8.83E+01 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2405719957040752E-006) differ by less than 4E-4 (0.00013985256106807675) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2405719950940886E-006) differ by less than 4E-4 (0.00013985206930144933) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -160,35 +155,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.333e-07 [2.3326290771198648E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.333e-07 [2.3326290797495657E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1342.0233s - [COUNTERS] Fortran Overhead ( 0 ) : 99.5419s - [COUNTERS] CudaCpp MEs ( 2 ) : 1242.4814s for 90112 events => throughput is 7.25E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1103.3837s + [COUNTERS] Fortran Overhead ( 0 ) : 82.1082s + [COUNTERS] CudaCpp MEs ( 2 ) : 1021.2755s for 90112 events => throughput is 8.82E+01 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326290771198648E-007) differ by less than 4E-4 (0.00014139199589124907) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326290797495657E-007) differ by less than 4E-4 (0.0001413931234055532) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.627892e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.035620e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.617246e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.036074e+02 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,7 +199,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -212,9 +207,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.241e-06 [1.2405717007921116E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 52.4912s - [COUNTERS] Fortran Overhead ( 0 ) : 24.8093s - [COUNTERS] CudaCpp MEs ( 2 ) : 27.6819s for 8192 events => throughput is 2.96E+02 events/s + [COUNTERS] PROGRAM TOTAL : 42.4235s + [COUNTERS] Fortran Overhead ( 0 ) : 19.8408s + [COUNTERS] CudaCpp MEs ( 2 ) : 22.5827s for 8192 events => throughput is 3.63E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -238,7 +233,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -246,9 +241,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.333e-07 [2.3326284900828787E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 333.5578s - [COUNTERS] Fortran Overhead ( 0 ) : 28.7441s - [COUNTERS] CudaCpp MEs ( 2 ) : 304.8137s for 90112 events => throughput is 2.96E+02 events/s + [COUNTERS] PROGRAM TOTAL : 270.7126s + [COUNTERS] Fortran Overhead ( 0 ) : 22.4048s + [COUNTERS] CudaCpp MEs ( 2 ) : 248.3077s for 90112 events => throughput is 3.63E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -259,14 +254,14 @@ OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326284900828787E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.354565e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.204257e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.352919e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.203793e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -282,7 +277,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -290,9 +285,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.241e-06 [1.2405716659252656E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 26.9353s - [COUNTERS] Fortran Overhead ( 0 ) : 12.5805s - [COUNTERS] CudaCpp MEs ( 2 ) : 14.3549s for 8192 events => throughput is 5.71E+02 events/s + [COUNTERS] PROGRAM TOTAL : 21.5813s + [COUNTERS] Fortran Overhead ( 0 ) : 10.0100s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.5714s for 8192 events => throughput is 7.08E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -316,7 +311,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -324,9 +319,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.333e-07 [2.3326277036840957E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 175.5244s - [COUNTERS] Fortran Overhead ( 0 ) : 16.6137s - [COUNTERS] CudaCpp MEs ( 2 ) : 158.9107s for 90112 events => throughput is 5.67E+02 events/s + [COUNTERS] PROGRAM TOTAL : 139.8651s + [COUNTERS] Fortran Overhead ( 0 ) : 12.5495s + [COUNTERS] CudaCpp MEs ( 2 ) : 127.3156s for 90112 events => throughput is 7.08E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -337,14 +332,14 @@ OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326277036840957E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.796719e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.487964e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.813418e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.464257e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -360,7 +355,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -368,9 +363,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.241e-06 [1.2405716659252656E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 24.0035s - [COUNTERS] Fortran Overhead ( 0 ) : 11.1136s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.8899s for 8192 events => throughput is 6.36E+02 events/s + [COUNTERS] PROGRAM TOTAL : 19.0340s + [COUNTERS] Fortran Overhead ( 0 ) : 8.7393s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.2947s for 8192 events => throughput is 7.96E+02 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,7 +389,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -402,9 +397,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.333e-07 [2.3326277036840957E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 155.2962s - [COUNTERS] Fortran Overhead ( 0 ) : 15.0900s - [COUNTERS] CudaCpp MEs ( 2 ) : 140.2063s for 90112 events => throughput is 6.43E+02 events/s + [COUNTERS] PROGRAM TOTAL : 124.5246s + [COUNTERS] Fortran Overhead ( 0 ) : 11.2721s + [COUNTERS] CudaCpp MEs ( 2 ) : 113.2525s for 90112 events => throughput is 7.96E+02 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -415,14 +410,14 @@ OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326277036840957E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.795563e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.741863e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.781198e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.765601e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,7 +433,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -446,9 +441,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.241e-06 [1.2405719306052570E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 25.3167s - [COUNTERS] Fortran Overhead ( 0 ) : 12.6125s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.7041s for 8192 events => throughput is 6.45E+02 events/s + [COUNTERS] PROGRAM TOTAL : 10.9582s + [COUNTERS] Fortran Overhead ( 0 ) : 5.1260s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.8322s for 8192 events => throughput is 1.40E+03 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -472,7 +467,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -480,9 +475,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.333e-07 [2.3326283660088769E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 157.3576s - [COUNTERS] Fortran Overhead ( 0 ) : 16.6484s - [COUNTERS] CudaCpp MEs ( 2 ) : 140.7092s for 90112 events => throughput is 6.40E+02 events/s + [COUNTERS] PROGRAM TOTAL : 71.9874s + [COUNTERS] Fortran Overhead ( 0 ) : 7.6592s + [COUNTERS] CudaCpp MEs ( 2 ) : 64.3282s for 90112 events => throughput is 1.40E+03 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -493,121 +488,17 @@ OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326283660088769E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.841550e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.700605e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.830427e+02 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.241e-06 [1.2405722175509512E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 2.5511s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0591s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4920s for 8192 events => throughput is 1.66E+04 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.2403985227939174E-006) and cuda (1.2405722175509512E-006) differ by less than 4E-4 (0.00014003141235829908) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.333e-07 [2.3326296967941821E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 11.4353s - [COUNTERS] Fortran Overhead ( 0 ) : 6.0032s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.4321s for 90112 events => throughput is 1.66E+04 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.3322993086655967E-007) and cuda (2.3326296967941821E-007) differ by less than 4E-4 (0.0001416576883412901) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.630624e+04 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.646596e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.329013e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 512 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.359221e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.329144e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.339287e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.285838e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.715763e+03 ) sec^-1 -*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.423096e+03 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 9cddd5fe7c..8122ec68e8 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,41 +1,36 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' - make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2024-05-16_04:26:13 +DATE: 2024-05-16_17:44:54 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -50,7 +45,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +53,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 101.9892s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5028s - [COUNTERS] Fortran MEs ( 1 ) : 101.4864s for 8192 events => throughput is 8.07E+01 events/s + [COUNTERS] PROGRAM TOTAL : 84.9287s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3244s + [COUNTERS] Fortran MEs ( 1 ) : 84.6044s for 8192 events => throughput is 9.68E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -75,7 +70,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +78,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 101.7400s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5055s - [COUNTERS] Fortran MEs ( 1 ) : 101.2345s for 8192 events => throughput is 8.09E+01 events/s + [COUNTERS] PROGRAM TOTAL : 84.9670s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3251s + [COUNTERS] Fortran MEs ( 1 ) : 84.6419s for 8192 events => throughput is 9.68E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -100,7 +95,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +103,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1119.6356s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4254s - [COUNTERS] Fortran MEs ( 1 ) : 1115.2102s for 90112 events => throughput is 8.08E+01 events/s + [COUNTERS] PROGRAM TOTAL : 934.6879s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8314s + [COUNTERS] Fortran MEs ( 1 ) : 931.8565s for 90112 events => throughput is 9.67E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,7 +120,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -133,9 +128,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985299359844E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 229.4020s - [COUNTERS] Fortran Overhead ( 0 ) : 103.2152s - [COUNTERS] CudaCpp MEs ( 2 ) : 126.1868s for 8192 events => throughput is 6.49E+01 events/s + [COUNTERS] PROGRAM TOTAL : 181.1858s + [COUNTERS] Fortran Overhead ( 0 ) : 83.5159s + [COUNTERS] CudaCpp MEs ( 2 ) : 97.6700s for 8192 events => throughput is 8.39E+01 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -158,7 +153,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -166,9 +161,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993212353001E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1528.1049s - [COUNTERS] Fortran Overhead ( 0 ) : 113.9982s - [COUNTERS] CudaCpp MEs ( 2 ) : 1414.1067s for 90112 events => throughput is 6.37E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1160.5951s + [COUNTERS] Fortran Overhead ( 0 ) : 86.1122s + [COUNTERS] CudaCpp MEs ( 2 ) : 1074.4829s for 90112 events => throughput is 8.39E+01 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -179,14 +174,14 @@ OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993212353001E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.425842e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.852275e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.948869e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.858042e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +196,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,9 +204,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985295828471E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 117.4242s - [COUNTERS] Fortran Overhead ( 0 ) : 53.6967s - [COUNTERS] CudaCpp MEs ( 2 ) : 63.7275s for 8192 events => throughput is 1.29E+02 events/s + [COUNTERS] PROGRAM TOTAL : 90.1394s + [COUNTERS] Fortran Overhead ( 0 ) : 41.5547s + [COUNTERS] CudaCpp MEs ( 2 ) : 48.5846s for 8192 events => throughput is 1.69E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -234,7 +229,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -242,9 +237,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993222645653E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 756.7451s - [COUNTERS] Fortran Overhead ( 0 ) : 57.7650s - [COUNTERS] CudaCpp MEs ( 2 ) : 698.9802s for 90112 events => throughput is 1.29E+02 events/s + [COUNTERS] PROGRAM TOTAL : 578.2572s + [COUNTERS] Fortran Overhead ( 0 ) : 44.1460s + [COUNTERS] CudaCpp MEs ( 2 ) : 534.1112s for 90112 events => throughput is 1.69E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -255,14 +250,14 @@ OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993222645653E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.540886e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.989726e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.526888e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.989973e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -277,7 +272,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -285,9 +280,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 51.3666s - [COUNTERS] Fortran Overhead ( 0 ) : 23.6472s - [COUNTERS] CudaCpp MEs ( 2 ) : 27.7194s for 8192 events => throughput is 2.96E+02 events/s + [COUNTERS] PROGRAM TOTAL : 41.0233s + [COUNTERS] Fortran Overhead ( 0 ) : 18.6228s + [COUNTERS] CudaCpp MEs ( 2 ) : 22.4005s for 8192 events => throughput is 3.66E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -310,7 +305,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -318,9 +313,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 334.4015s - [COUNTERS] Fortran Overhead ( 0 ) : 27.7321s - [COUNTERS] CudaCpp MEs ( 2 ) : 306.6693s for 90112 events => throughput is 2.94E+02 events/s + [COUNTERS] PROGRAM TOTAL : 267.4742s + [COUNTERS] Fortran Overhead ( 0 ) : 21.1578s + [COUNTERS] CudaCpp MEs ( 2 ) : 246.3164s for 90112 events => throughput is 3.66E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -331,14 +326,14 @@ OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993222447204E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.517938e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.484454e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.525606e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.480754e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -353,7 +348,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -361,9 +356,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 44.9641s - [COUNTERS] Fortran Overhead ( 0 ) : 20.5328s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.4313s for 8192 events => throughput is 3.35E+02 events/s + [COUNTERS] PROGRAM TOTAL : 36.1836s + [COUNTERS] Fortran Overhead ( 0 ) : 16.3046s + [COUNTERS] CudaCpp MEs ( 2 ) : 19.8790s for 8192 events => throughput is 4.12E+02 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -386,7 +381,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -394,9 +389,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 294.2131s - [COUNTERS] Fortran Overhead ( 0 ) : 24.5260s - [COUNTERS] CudaCpp MEs ( 2 ) : 269.6871s for 90112 events => throughput is 3.34E+02 events/s + [COUNTERS] PROGRAM TOTAL : 237.6060s + [COUNTERS] Fortran Overhead ( 0 ) : 18.8810s + [COUNTERS] CudaCpp MEs ( 2 ) : 218.7250s for 90112 events => throughput is 4.12E+02 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -407,14 +402,14 @@ OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993222447204E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.118767e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.135779e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.099496e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.142811e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -429,7 +424,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -437,9 +432,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 49.6272s - [COUNTERS] Fortran Overhead ( 0 ) : 23.9735s - [COUNTERS] CudaCpp MEs ( 2 ) : 25.6537s for 8192 events => throughput is 3.19E+02 events/s + [COUNTERS] PROGRAM TOTAL : 20.6740s + [COUNTERS] Fortran Overhead ( 0 ) : 9.5775s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.0965s for 8192 events => throughput is 7.38E+02 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -462,7 +457,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -470,9 +465,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 308.1385s - [COUNTERS] Fortran Overhead ( 0 ) : 28.1125s - [COUNTERS] CudaCpp MEs ( 2 ) : 280.0260s for 90112 events => throughput is 3.22E+02 events/s + [COUNTERS] PROGRAM TOTAL : 133.9861s + [COUNTERS] Fortran Overhead ( 0 ) : 12.1371s + [COUNTERS] CudaCpp MEs ( 2 ) : 121.8490s for 90112 events => throughput is 7.40E+02 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -483,121 +478,17 @@ OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993222447204E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.494217e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.859816e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.498492e+02 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985217419736E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 3.6127s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7479s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8648s for 8192 events => throughput is 9.47E+03 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.2403985227939174E-006) and cuda (1.2403985217419736E-006) differ by less than 2E-4 (8.480691704448873e-10) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993078576733E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 16.2177s - [COUNTERS] Fortran Overhead ( 0 ) : 6.7356s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.4821s for 90112 events => throughput is 9.50E+03 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.3322993086655967E-007) and cuda (2.3322993078576733E-007) differ by less than 2E-4 (3.464063480507207e-10) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.422089e+03 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.074505e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.108350e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 512 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.160591e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.110190e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.113742e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.112799e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.872656e+02 ) sec^-1 -*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.647292e+03 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index c909267a2d..72e78c5641 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -1,6 +1,4 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu - -make USEBUILDDIR=1 BACKEND=cuda +Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu make USEBUILDDIR=1 BACKEND=cppnone @@ -8,34 +6,31 @@ make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2024-05-16_01:42:38 +DATE: 2024-05-16_15:38:24 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -50,7 +45,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +53,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1817 events) - [COUNTERS] PROGRAM TOTAL : 0.4817s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4067s - [COUNTERS] Fortran MEs ( 1 ) : 0.0751s for 8192 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3140s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2593s + [COUNTERS] Fortran MEs ( 1 ) : 0.0547s for 8192 events => throughput is 1.50E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -75,7 +70,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +78,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4101s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3363s - [COUNTERS] Fortran MEs ( 1 ) : 0.0738s for 8192 events => throughput is 1.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2697s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2150s + [COUNTERS] Fortran MEs ( 1 ) : 0.0547s for 8192 events => throughput is 1.50E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -100,7 +95,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/avalassi/output_gqttq_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +103,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.3997s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5919s - [COUNTERS] Fortran MEs ( 1 ) : 0.8079s for 90112 events => throughput is 1.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6275s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0286s + [COUNTERS] Fortran MEs ( 1 ) : 0.5989s for 90112 events => throughput is 1.50E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,7 +120,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -133,9 +128,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263335] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4922s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4133s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0789s for 8192 events => throughput is 1.04E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3341s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2745s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0596s for 8192 events => throughput is 1.38E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -158,7 +153,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -166,9 +161,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561293] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.5284s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6490s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8794s for 90112 events => throughput is 1.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7464s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0913s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6551s for 90112 events => throughput is 1.38E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -179,14 +174,14 @@ OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561293) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.038604e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.376381e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.042402e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.402553e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +196,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,9 +204,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351262530] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4203s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3775s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0428s for 8192 events => throughput is 1.91E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2779s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2459s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0320s for 8192 events => throughput is 2.56E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -234,7 +229,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -242,9 +237,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561281] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.0947s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6149s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4799s for 90112 events => throughput is 1.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4161s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0639s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3521s for 90112 events => throughput is 2.56E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -255,14 +250,14 @@ OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561281) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.949373e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.521611e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.919896e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.619957e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -277,7 +272,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -285,9 +280,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3846s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3601s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0244s for 8192 events => throughput is 3.35E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2496s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2320s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0176s for 8192 events => throughput is 4.67E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -310,7 +305,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -318,9 +313,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.8777s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6020s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2756s for 90112 events => throughput is 3.27E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.2427s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0499s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1927s for 90112 events => throughput is 4.68E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -331,14 +326,14 @@ OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561295) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.197689e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.789149e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.341628e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.838998e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -353,7 +348,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -361,9 +356,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3821s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3591s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0230s for 8192 events => throughput is 3.56E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2495s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2326s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0169s for 8192 events => throughput is 4.84E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -386,7 +381,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -394,9 +389,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.8414s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5939s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2475s for 90112 events => throughput is 3.64E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.2355s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0496s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1859s for 90112 events => throughput is 4.85E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -407,14 +402,14 @@ OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561295) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.543777e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.954641e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.667139e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.998895e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -429,7 +424,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -437,9 +432,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4059s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3719s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0340s for 8192 events => throughput is 2.41E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2441s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2295s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0145s for 8192 events => throughput is 5.64E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -462,7 +457,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -470,9 +465,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.0051s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6219s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3832s for 90112 events => throughput is 2.35E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.2056s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0461s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1595s for 90112 events => throughput is 5.65E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -483,121 +478,17 @@ OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561295) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.349902e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.486226e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.363260e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539351263363] fbridge_mode=1 - [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.7762s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7756s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.23E+07 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.27110539351263330) and cuda (0.27110539351263363) differ by less than 3E-14 (1.3322676295501878e-15) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686556561304] fbridge_mode=1 - [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.0116s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0036s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0080s for 90112 events => throughput is 1.13E+07 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21510686556561295) and cuda (0.21510686556561304) differ by less than 3E-14 (4.440892098500626e-16) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.582112e+07 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.121699e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.529721e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.531629e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.538677e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.807286e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.530045e+07 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.651722e+05 ) sec^-1 -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.783374e+07 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 4ac5ec3dc1..8d06046d46 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -1,41 +1,36 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu - - -make USEBUILDDIR=1 BACKEND=cuda +Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 - make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' + + +make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2024-05-16_13:45:22 +DATE: 2024-05-16_15:38:36 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -50,7 +45,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +53,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1817 events) - [COUNTERS] PROGRAM TOTAL : 0.4857s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4107s - [COUNTERS] Fortran MEs ( 1 ) : 0.0749s for 8192 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3140s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2593s + [COUNTERS] Fortran MEs ( 1 ) : 0.0547s for 8192 events => throughput is 1.50E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -75,7 +70,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +78,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4161s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3412s - [COUNTERS] Fortran MEs ( 1 ) : 0.0749s for 8192 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2704s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2157s + [COUNTERS] Fortran MEs ( 1 ) : 0.0547s for 8192 events => throughput is 1.50E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -100,7 +95,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/avalassi/output_gqttq_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +103,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.4361s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6155s - [COUNTERS] Fortran MEs ( 1 ) : 0.8206s for 90112 events => throughput is 1.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6306s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0304s + [COUNTERS] Fortran MEs ( 1 ) : 0.6001s for 90112 events => throughput is 1.50E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,7 +120,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -133,9 +128,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110463093540638] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4907s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4145s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0762s for 8192 events => throughput is 1.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3248s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2695s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0553s for 8192 events => throughput is 1.48E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -158,7 +153,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -166,9 +161,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686273216112] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.5059s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6664s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8394s for 90112 events => throughput is 1.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6926s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0846s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6080s for 90112 events => throughput is 1.48E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -179,14 +174,14 @@ OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686273216112) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.092440e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.483088e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.094603e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.519447e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +196,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,9 +204,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110459152958460] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3934s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3663s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0270s for 8192 events => throughput is 3.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2547s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2348s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0199s for 8192 events => throughput is 4.11E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -234,7 +229,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -242,9 +237,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510683016166510] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.9232s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6228s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3004s for 90112 events => throughput is 3.00E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.2695s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0503s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2192s for 90112 events => throughput is 4.11E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -255,14 +250,14 @@ OK! xsec from fortran (0.21510686556561295) and cpp (0.21510683016166510) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.039638e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.996379e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.034015e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.067723e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -277,7 +272,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -285,9 +280,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110460595003461] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3713s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3577s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0136s for 8192 events => throughput is 6.04E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2344s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2249s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0095s for 8192 events => throughput is 8.60E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -310,7 +305,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -318,9 +313,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510682502089912] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.7502s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6011s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1490s for 90112 events => throughput is 6.05E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.1476s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0430s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1046s for 90112 events => throughput is 8.61E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -331,14 +326,14 @@ OK! xsec from fortran (0.21510686556561295) and cpp (0.21510682502089912) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.923571e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.861702e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.853443e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.930775e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -353,7 +348,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -361,9 +356,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110460595003461] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3638s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3514s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0124s for 8192 events => throughput is 6.61E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2343s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2251s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0092s for 8192 events => throughput is 8.90E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -386,7 +381,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -394,9 +389,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510682502089912] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.7444s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6060s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1385s for 90112 events => throughput is 6.51E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.1442s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0433s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1010s for 90112 events => throughput is 8.93E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -407,14 +402,14 @@ OK! xsec from fortran (0.21510686556561295) and cpp (0.21510682502089912) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.334346e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.196341e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.476144e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.299602e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -430,7 +425,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -438,9 +433,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110464176080312] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3762s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3583s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0178s for 8192 events => throughput is 4.59E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2297s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2224s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0073s for 8192 events => throughput is 1.12E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -464,7 +459,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -472,9 +467,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510685411522326] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.8096s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6129s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1967s for 90112 events => throughput is 4.58E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.1213s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0407s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0806s for 90112 events => throughput is 1.12E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,121 +480,17 @@ OK! xsec from fortran (0.21510686556561295) and cpp (0.21510685411522326) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.750606e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.128056e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.766894e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110478167944563] fbridge_mode=1 - [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.7793s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7788s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.48E+07 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.27110539351263330) and cuda (0.27110478167944563) differ by less than 4E-4 (2.2568093527297606e-06) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510689885789414] fbridge_mode=1 - [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.0413s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0348s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0065s for 90112 events => throughput is 1.38E+07 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21510686556561295) and cuda (0.21510689885789414) differ by less than 4E-4 (1.547708907700951e-07) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.566939e+07 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.326602e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.593547e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.720103e+08 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.619232e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.806222e+08 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.144615e+07 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.146678e+06 ) sec^-1 -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.016256e+07 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index 23f8d1233a..dcca5e7b6d 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -1,41 +1,36 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu - -make USEBUILDDIR=1 BACKEND=cuda - +Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' + +make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2024-05-16_01:43:25 +DATE: 2024-05-16_15:38:47 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -50,7 +45,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +53,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1817 events) - [COUNTERS] PROGRAM TOTAL : 0.4870s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4120s - [COUNTERS] Fortran MEs ( 1 ) : 0.0750s for 8192 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3141s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2593s + [COUNTERS] Fortran MEs ( 1 ) : 0.0547s for 8192 events => throughput is 1.50E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -75,7 +70,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +78,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4171s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3424s - [COUNTERS] Fortran MEs ( 1 ) : 0.0747s for 8192 events => throughput is 1.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2729s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2182s + [COUNTERS] Fortran MEs ( 1 ) : 0.0547s for 8192 events => throughput is 1.50E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -100,7 +95,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/avalassi/output_gqttq_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +103,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.4382s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6232s - [COUNTERS] Fortran MEs ( 1 ) : 0.8150s for 90112 events => throughput is 1.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6294s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0299s + [COUNTERS] Fortran MEs ( 1 ) : 0.5995s for 90112 events => throughput is 1.50E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,7 +120,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -133,9 +128,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539348916002] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.5038s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4232s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0806s for 8192 events => throughput is 1.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3355s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2758s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0597s for 8192 events => throughput is 1.37E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -158,7 +153,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -166,9 +161,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686560794337] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.5722s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6823s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8899s for 90112 events => throughput is 1.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7435s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0908s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6527s for 90112 events => throughput is 1.38E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -179,14 +174,14 @@ OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686560794337) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.030982e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.397909e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.031715e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.408152e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +196,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,9 +204,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539348916002] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4234s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3811s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0422s for 8192 events => throughput is 1.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2771s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2460s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0311s for 8192 events => throughput is 2.63E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -234,7 +229,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -242,9 +237,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686560794334] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.1135s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6453s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4682s for 90112 events => throughput is 1.92E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4048s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0631s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3417s for 90112 events => throughput is 2.64E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -255,14 +250,14 @@ OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686560794334) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.925046e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.616672e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.926756e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.648955e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -277,7 +272,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -285,9 +280,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539330272815] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3913s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3660s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0253s for 8192 events => throughput is 3.24E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2495s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2324s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0171s for 8192 events => throughput is 4.80E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -310,7 +305,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -318,9 +313,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686558551748] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.8962s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6193s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2769s for 90112 events => throughput is 3.25E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.2346s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0474s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1872s for 90112 events => throughput is 4.81E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -331,14 +326,14 @@ OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686558551748) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.266154e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.902593e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.269502e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.938515e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -353,7 +348,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -361,9 +356,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539330272815] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3831s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3610s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0221s for 8192 events => throughput is 3.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2487s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2323s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0164s for 8192 events => throughput is 4.99E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -386,7 +381,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -394,9 +389,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686558551748] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.8418s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5992s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2426s for 90112 events => throughput is 3.71E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.2294s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0492s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1802s for 90112 events => throughput is 5.00E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -407,14 +402,14 @@ OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686558551748) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.789065e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.088595e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.854777e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.144797e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -430,7 +425,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -438,9 +433,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539330272815] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4096s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3737s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0358s for 8192 events => throughput is 2.29E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2516s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2362s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0154s for 8192 events => throughput is 5.33E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -463,7 +458,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -471,9 +466,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686558551748] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.0275s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6170s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4105s for 90112 events => throughput is 2.20E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.2210s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0523s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1688s for 90112 events => throughput is 5.34E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -484,121 +479,17 @@ OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686558551748) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.335204e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.254596e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.335548e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539343558537] fbridge_mode=1 - [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.7731s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7724s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.22E+07 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.27110539351263330) and cuda (0.27110539343558537) differ by less than 2E-4 (2.8419910869104115e-10) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686553631395] fbridge_mode=1 - [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.0091s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0010s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0081s for 90112 events => throughput is 1.12E+07 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21510686556561295) and cuda (0.21510686553631395) differ by less than 2E-4 (1.3620671257541517e-10) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.632055e+07 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.046951e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.534300e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.533151e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.529948e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.832656e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.532773e+07 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.419540e+05 ) sec^-1 -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.792603e+07 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt index 7b51bb9221..a8a31556d0 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt @@ -1,41 +1,36 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx - -make USEBUILDDIR=1 BACKEND=cuda +Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' + +make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' OMP_NUM_THREADS= -DATE: 2024-05-16_05:58:47 +DATE: 2024-05-16_18:54:45 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -50,7 +45,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +53,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 3321 events (found 6423 events) - [COUNTERS] PROGRAM TOTAL : 0.9450s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8961s - [COUNTERS] Fortran MEs ( 1 ) : 0.0489s for 8192 events => throughput is 1.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6024s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5656s + [COUNTERS] Fortran MEs ( 1 ) : 0.0368s for 8192 events => throughput is 2.23E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -75,7 +70,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +78,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4283s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3794s - [COUNTERS] Fortran MEs ( 1 ) : 0.0489s for 8192 events => throughput is 1.67E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2714s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2346s + [COUNTERS] Fortran MEs ( 1 ) : 0.0368s for 8192 events => throughput is 2.23E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -100,7 +95,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp/avalassi/output_heftggbb_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +103,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=0 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.8593s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3225s - [COUNTERS] Fortran MEs ( 1 ) : 0.5368s for 90112 events => throughput is 1.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.2529s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8482s + [COUNTERS] Fortran MEs ( 1 ) : 0.4047s for 90112 events => throughput is 2.23E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,7 +120,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -133,9 +128,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256148] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4564s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4132s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0431s for 8192 events => throughput is 1.90E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3007s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2678s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0329s for 8192 events => throughput is 2.49E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -158,7 +153,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -166,9 +161,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377564] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.8316s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3535s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4781s for 90112 events => throughput is 1.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.2402s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8784s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3619s for 90112 events => throughput is 2.49E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -179,14 +174,14 @@ OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895240377564) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.935091e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.532595e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.955688e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.573607e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +196,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,9 +204,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4208s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3966s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0242s for 8192 events => throughput is 3.38E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2710s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2529s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0181s for 8192 events => throughput is 4.52E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -234,7 +229,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -242,9 +237,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377564] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.6048s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3350s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2698s for 90112 events => throughput is 3.34E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.0656s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8658s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1997s for 90112 events => throughput is 4.51E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -255,14 +250,14 @@ OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895240377564) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.396840e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.469892e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.435183e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.578748e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -277,7 +272,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -285,9 +280,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256152] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4008s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3863s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0145s for 8192 events => throughput is 5.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2570s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2464s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0106s for 8192 events => throughput is 7.75E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -310,7 +305,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -318,9 +313,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377560] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.5134s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3430s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1705s for 90112 events => throughput is 5.29E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9758s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8593s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1166s for 90112 events => throughput is 7.73E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -331,14 +326,14 @@ OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895240377560) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.293625e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.020932e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.214602e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.111427e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -353,7 +348,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -361,9 +356,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256152] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3999s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3865s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0134s for 8192 events => throughput is 6.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2564s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2461s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0102s for 8192 events => throughput is 8.03E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -386,7 +381,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -394,9 +389,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377560] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.4652s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3183s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1469s for 90112 events => throughput is 6.13E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9695s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8578s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1117s for 90112 events => throughput is 8.07E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -407,14 +402,14 @@ OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895240377560) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.964595e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.234254e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.873725e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.383472e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -429,7 +424,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -437,9 +432,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256152] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4159s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3941s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0218s for 8192 events => throughput is 3.76E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2545s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2447s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0097s for 8192 events => throughput is 8.42E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -462,7 +457,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -470,9 +465,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377560] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.5767s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3320s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2446s for 90112 events => throughput is 3.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9639s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8570s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1069s for 90112 events => throughput is 8.43E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -483,121 +478,17 @@ OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895240377560) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.692127e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.279955e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.804832e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256165] fbridge_mode=1 - [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.8067s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8061s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.36E+07 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.0162955499256161) and cuda (2.0162955499256165) differ by less than 3E-14 (2.220446049250313e-16) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895240377573] fbridge_mode=1 - [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.7545s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7475s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0070s for 90112 events => throughput is 1.29E+07 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.0434895240377569) and cuda (2.0434895240377573) differ by less than 3E-14 (2.220446049250313e-16) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.804334e+07 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.230995e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.136468e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.810257e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.115441e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.048562e+08 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.115501e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.296965e+05 ) sec^-1 -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.748994e+07 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt index d09b81d7d3..664b9cfbfe 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt @@ -1,41 +1,36 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx - -make USEBUILDDIR=1 BACKEND=cuda +Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' OMP_NUM_THREADS= -DATE: 2024-05-16_05:59:14 +DATE: 2024-05-16_18:54:55 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -50,7 +45,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +53,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 3321 events (found 6423 events) - [COUNTERS] PROGRAM TOTAL : 0.9433s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8939s - [COUNTERS] Fortran MEs ( 1 ) : 0.0495s for 8192 events => throughput is 1.66E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6048s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5680s + [COUNTERS] Fortran MEs ( 1 ) : 0.0368s for 8192 events => throughput is 2.23E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -75,7 +70,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +78,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4256s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3772s - [COUNTERS] Fortran MEs ( 1 ) : 0.0484s for 8192 events => throughput is 1.69E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2729s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2361s + [COUNTERS] Fortran MEs ( 1 ) : 0.0368s for 8192 events => throughput is 2.23E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -100,7 +95,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp/avalassi/output_heftggbb_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +103,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=0 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.8568s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3199s - [COUNTERS] Fortran MEs ( 1 ) : 0.5369s for 90112 events => throughput is 1.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.2537s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8502s + [COUNTERS] Fortran MEs ( 1 ) : 0.4035s for 90112 events => throughput is 2.23E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,7 +120,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -133,9 +128,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162897371946169] fbridge_mode=1 [UNWEIGHT] Wrote 1620 events (found 1625 events) - [COUNTERS] PROGRAM TOTAL : 0.4525s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4113s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0413s for 8192 events => throughput is 1.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2960s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2655s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0305s for 8192 events => throughput is 2.69E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -143,7 +138,7 @@ OK! xsec from fortran (2.0162955499256161) and cpp (2.0162897371946169) differ b *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** ERROR! events.lhe.cpp.1 and events.lhe.ref.1 differ! -diff /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.cpp.1 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.ref.1 | head -20 +diff /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.cpp.1 /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.ref.1 | head -20 6206,6207c6206,6207 < 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.59936081260E+01 0.59936081260E+01 0.00000000000E+00 0. -1. < 5 1 1 2 501 0 0.45273385612E+02 -0.31131305296E+02 0.47763304676E+03 0.48080583916E+03 0.47000000000E+01 0. 1. diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt index 291c38991b..b85cc61a79 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt @@ -1,41 +1,36 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx - - -make USEBUILDDIR=1 BACKEND=cuda +Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' OMP_NUM_THREADS= -DATE: 2024-05-16_05:59:20 +DATE: 2024-05-16_18:54:58 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -50,7 +45,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +53,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 3321 events (found 6423 events) - [COUNTERS] PROGRAM TOTAL : 0.9570s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9073s - [COUNTERS] Fortran MEs ( 1 ) : 0.0498s for 8192 events => throughput is 1.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6036s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5668s + [COUNTERS] Fortran MEs ( 1 ) : 0.0368s for 8192 events => throughput is 2.22E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -75,7 +70,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +78,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4201s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3715s - [COUNTERS] Fortran MEs ( 1 ) : 0.0486s for 8192 events => throughput is 1.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2727s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2359s + [COUNTERS] Fortran MEs ( 1 ) : 0.0368s for 8192 events => throughput is 2.23E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -100,7 +95,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp/avalassi/output_heftggbb_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +103,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=0 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.8553s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3195s - [COUNTERS] Fortran MEs ( 1 ) : 0.5358s for 90112 events => throughput is 1.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.2509s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8468s + [COUNTERS] Fortran MEs ( 1 ) : 0.4041s for 90112 events => throughput is 2.23E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -126,7 +121,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -134,9 +129,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955975930954] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4604s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4161s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0443s for 8192 events => throughput is 1.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3085s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2757s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0328s for 8192 events => throughput is 2.50E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -160,7 +155,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -168,9 +163,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895706383660] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.8327s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3534s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4793s for 90112 events => throughput is 1.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.2408s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8801s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3608s for 90112 events => throughput is 2.50E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,15 +177,15 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.817766e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.385536e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.799752e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.429119e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -206,7 +201,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -214,9 +209,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955975930958] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4227s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3979s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0248s for 8192 events => throughput is 3.30E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2715s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2535s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0180s for 8192 events => throughput is 4.56E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -240,7 +235,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -248,9 +243,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895706383669] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.6020s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3321s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2699s for 90112 events => throughput is 3.34E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.0627s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8648s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1979s for 90112 events => throughput is 4.55E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -262,15 +257,15 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.208317e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.263258e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.242147e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.354057e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,7 +281,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -294,9 +289,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955953691082] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4018s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3865s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0153s for 8192 events => throughput is 5.35E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2558s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2450s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0108s for 8192 events => throughput is 7.56E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -320,7 +315,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -328,9 +323,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895701243878] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.4998s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3319s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1679s for 90112 events => throughput is 5.37E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9781s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8586s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1195s for 90112 events => throughput is 7.54E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -342,15 +337,15 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.916130e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.855167e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.911254e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.880927e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -366,7 +361,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -374,9 +369,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955953691082] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4012s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3876s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0136s for 8192 events => throughput is 6.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2601s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2496s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0105s for 8192 events => throughput is 7.80E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -400,7 +395,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -408,9 +403,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895701243878] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.4747s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3226s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1520s for 90112 events => throughput is 5.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9714s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8562s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1152s for 90112 events => throughput is 7.82E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -422,15 +417,15 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.307270e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.059394e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.162265e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.102012e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -446,7 +441,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -454,9 +449,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955953691082] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4179s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3948s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0231s for 8192 events => throughput is 3.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2564s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2461s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0102s for 8192 events => throughput is 8.00E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -480,7 +475,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -488,9 +483,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895701243878] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.5949s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3393s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2556s for 90112 events => throughput is 3.53E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9690s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8574s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1117s for 90112 events => throughput is 8.07E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -502,122 +497,18 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.332654e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.366098e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.349365e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955503257827] fbridge_mode=1 - [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.8124s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8118s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.37E+07 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.0162955499256161) and cuda (2.0162955503257827) differ by less than 2E-4 (1.9846613241725208e-10) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895242795732] fbridge_mode=1 - [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.7514s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7444s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0070s for 90112 events => throughput is 1.29E+07 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.0434895240377569) and cuda (2.0434895242795732) differ by less than 2E-4 (1.183348974365117e-10) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.815756e+07 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.247313e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.109600e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.657132e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.111219e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.039865e+08 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.111200e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.481957e+05 ) sec^-1 -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.771983e+07 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt index 80269e77b1..8dfe862e85 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt @@ -1,41 +1,36 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx - -make USEBUILDDIR=1 BACKEND=cuda - +Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx make USEBUILDDIR=1 BACKEND=cppnone - +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 + make USEBUILDDIR=1 BACKEND=cpp512y -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' OMP_NUM_THREADS= -DATE: 2024-05-16_06:00:18 +DATE: 2024-05-16_18:55:22 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -50,7 +45,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +53,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 1041 events) - [COUNTERS] PROGRAM TOTAL : 2.6807s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3392s - [COUNTERS] Fortran MEs ( 1 ) : 2.3415s for 8192 events => throughput is 3.50E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.9759s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2171s + [COUNTERS] Fortran MEs ( 1 ) : 1.7588s for 8192 events => throughput is 4.66E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -75,7 +70,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +78,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.6731s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3387s - [COUNTERS] Fortran MEs ( 1 ) : 2.3344s for 8192 events => throughput is 3.51E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.9741s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2156s + [COUNTERS] Fortran MEs ( 1 ) : 1.7585s for 8192 events => throughput is 4.66E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -100,7 +95,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /tmp/avalassi/output_smeftggtttt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +103,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > / [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 27.7347s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8315s - [COUNTERS] Fortran MEs ( 1 ) : 25.9031s for 90112 events => throughput is 3.48E+03 events/s + [COUNTERS] PROGRAM TOTAL : 20.5139s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1701s + [COUNTERS] Fortran MEs ( 1 ) : 19.3438s for 90112 events => throughput is 4.66E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,7 +120,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -133,9 +128,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 5.3198s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7672s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.5526s for 8192 events => throughput is 3.21E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.9218s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0315s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.8903s for 8192 events => throughput is 4.33E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -158,7 +153,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -166,9 +161,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438187E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 31.9550s - [COUNTERS] Fortran Overhead ( 0 ) : 4.2173s - [COUNTERS] CudaCpp MEs ( 2 ) : 27.7377s for 90112 events => throughput is 3.25E+03 events/s + [COUNTERS] PROGRAM TOTAL : 23.7660s + [COUNTERS] Fortran Overhead ( 0 ) : 2.9782s + [COUNTERS] CudaCpp MEs ( 2 ) : 20.7878s for 90112 events => throughput is 4.33E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -179,14 +174,14 @@ OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668083551438187E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.410579e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.538676e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.412797e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.540930e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +196,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,9 +204,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084412E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.9225s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6149s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.3076s for 8192 events => throughput is 6.27E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.0838s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1345s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9493s for 8192 events => throughput is 8.63E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -234,7 +229,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -242,9 +237,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 17.4719s - [COUNTERS] Fortran Overhead ( 0 ) : 3.0716s - [COUNTERS] CudaCpp MEs ( 2 ) : 14.4004s for 90112 events => throughput is 6.26E+03 events/s + [COUNTERS] PROGRAM TOTAL : 12.5399s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0884s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.4514s for 90112 events => throughput is 8.62E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -255,14 +250,14 @@ OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668083551438230E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.486190e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.988476e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.480385e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.934332e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -277,7 +272,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -285,9 +280,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.4971s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9161s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5810s for 8192 events => throughput is 1.41E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.0362s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6247s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4115s for 8192 events => throughput is 1.99E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -310,7 +305,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -318,9 +313,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 8.7948s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3698s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.4250s for 90112 events => throughput is 1.40E+04 events/s + [COUNTERS] PROGRAM TOTAL : 6.1052s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5780s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.5273s for 90112 events => throughput is 1.99E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -331,14 +326,14 @@ OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668083551438198E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.446474e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.019469e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.453779e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.015794e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -353,7 +348,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -361,9 +356,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.3443s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8339s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5104s for 8192 events => throughput is 1.60E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.9608s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5859s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3750s for 8192 events => throughput is 2.18E+04 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -386,7 +381,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -394,9 +389,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 8.0840s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3189s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.7651s for 90112 events => throughput is 1.56E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.6614s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5379s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.1235s for 90112 events => throughput is 2.19E+04 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -407,14 +402,14 @@ OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668083551438198E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.657111e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.238664e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.653778e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.241558e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -429,7 +424,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -437,9 +432,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.7114s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0187s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6926s for 8192 events => throughput is 1.18E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6974s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4560s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2414s for 8192 events => throughput is 3.39E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -462,7 +457,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -470,9 +465,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 10.1275s - [COUNTERS] Fortran Overhead ( 0 ) : 2.5284s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.5992s for 90112 events => throughput is 1.19E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.0653s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4076s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.6577s for 90112 events => throughput is 3.39E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -483,121 +478,17 @@ OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668083551438198E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.225114e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.456123e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.220944e+04 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.8326s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8156s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0171s for 8192 events => throughput is 4.80E+05 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.9896697955084444E-007) and cuda (7.9896697955084454E-007) differ by less than 3E-14 (2.220446049250313e-16) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 2.4753s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2868s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1885s for 90112 events => throughput is 4.78E+05 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.6668083551438230E-007) and cuda (7.6668083551438198E-007) differ by less than 3E-14 (4.440892098500626e-16) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.843300e+05 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.218089e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.155842e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.421655e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.190444e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.415946e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.148629e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.469628e+04 ) sec^-1 -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.764175e+05 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt index 45b154f6da..f1c86598c9 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt @@ -1,41 +1,36 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' - make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' OMP_NUM_THREADS= -DATE: 2024-05-16_06:02:57 +DATE: 2024-05-16_18:56:56 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -50,7 +45,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +53,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 1041 events) - [COUNTERS] PROGRAM TOTAL : 2.6694s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3400s - [COUNTERS] Fortran MEs ( 1 ) : 2.3294s for 8192 events => throughput is 3.52E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.9756s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2172s + [COUNTERS] Fortran MEs ( 1 ) : 1.7584s for 8192 events => throughput is 4.66E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -75,7 +70,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +78,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.6748s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3390s - [COUNTERS] Fortran MEs ( 1 ) : 2.3358s for 8192 events => throughput is 3.51E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.9741s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2158s + [COUNTERS] Fortran MEs ( 1 ) : 1.7583s for 8192 events => throughput is 4.66E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -100,7 +95,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /tmp/avalassi/output_smeftggtttt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +103,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > / [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 27.4771s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8059s - [COUNTERS] Fortran MEs ( 1 ) : 25.6712s for 90112 events => throughput is 3.51E+03 events/s + [COUNTERS] PROGRAM TOTAL : 20.5198s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1703s + [COUNTERS] Fortran MEs ( 1 ) : 19.3495s for 90112 events => throughput is 4.66E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,7 +120,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -133,9 +128,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896785213255034E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 5.0871s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6795s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.4076s for 8192 events => throughput is 3.40E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.8367s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9909s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.8458s for 8192 events => throughput is 4.44E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -158,7 +153,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -166,9 +161,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668138359550833E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 30.6384s - [COUNTERS] Fortran Overhead ( 0 ) : 4.1272s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.5112s for 90112 events => throughput is 3.40E+03 events/s + [COUNTERS] PROGRAM TOTAL : 23.2382s + [COUNTERS] Fortran Overhead ( 0 ) : 2.9415s + [COUNTERS] CudaCpp MEs ( 2 ) : 20.2967s for 90112 events => throughput is 4.44E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -179,14 +174,14 @@ OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668138359550833E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.518646e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.625362e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.519140e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.630971e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +196,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,9 +204,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896766542858863E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.6928s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0131s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6797s for 8192 events => throughput is 1.21E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.1744s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6909s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4835s for 8192 events => throughput is 1.69E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -234,7 +229,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -242,9 +237,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668121906848987E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 9.9205s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4645s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.4560s for 90112 events => throughput is 1.21E+04 events/s + [COUNTERS] PROGRAM TOTAL : 6.9719s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6434s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.3285s for 90112 events => throughput is 1.69E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -255,14 +250,14 @@ OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668121906848987E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.232964e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.743379e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.231409e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.737322e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -277,7 +272,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -285,9 +280,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896764408326359E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.9294s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6306s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2988s for 8192 events => throughput is 2.74E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6365s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4244s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2122s for 8192 events => throughput is 3.86E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -310,7 +305,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -318,9 +313,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668124799901306E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 5.3691s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0900s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.2791s for 90112 events => throughput is 2.75E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.7112s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3768s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.3344s for 90112 events => throughput is 3.86E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -331,14 +326,14 @@ OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668124799901306E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.797581e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.009425e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.799365e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.005235e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -353,7 +348,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -361,9 +356,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896764408326359E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.8582s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5958s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2624s for 8192 events => throughput is 3.12E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5979s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4052s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1928s for 8192 events => throughput is 4.25E+04 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -386,7 +381,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -394,9 +389,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668124799901306E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 4.9443s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0456s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.8987s for 90112 events => throughput is 3.11E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.4892s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3700s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.1192s for 90112 events => throughput is 4.25E+04 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -407,14 +402,14 @@ OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668124799901306E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.232867e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.385408e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.231460e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.384671e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -429,7 +424,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -437,9 +432,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896778056937195E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.0285s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6828s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3457s for 8192 events => throughput is 2.37E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4608s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3375s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1233s for 8192 events => throughput is 6.64E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -462,7 +457,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -470,9 +465,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668139178203571E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 5.9990s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1683s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.8307s for 90112 events => throughput is 2.35E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.6549s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2980s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.3569s for 90112 events => throughput is 6.64E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -483,121 +478,17 @@ OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668139178203571E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.399591e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.847540e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.392653e+04 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896805369365078E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.8276s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8136s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0140s for 8192 events => throughput is 5.86E+05 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.9896697955084444E-007) and cuda (7.9896805369365078E-007) differ by less than 4E-4 (1.3444145174901223e-06) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668194616292154E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 2.4309s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2769s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1540s for 90112 events => throughput is 5.85E+05 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.6668083551438230E-007) and cuda (7.6668194616292154E-007) differ by less than 4E-4 (1.4486452351025747e-06) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.229528e+05 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.512458e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.376887e+06 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.385341e+06 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.353397e+06 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.396782e+06 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.372639e+06 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.892703e+04 ) sec^-1 -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.860961e+05 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt index 66daeb0e97..53302fc578 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt @@ -1,41 +1,36 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx - - -make USEBUILDDIR=1 BACKEND=cuda +Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx make USEBUILDDIR=1 BACKEND=cppnone + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' OMP_NUM_THREADS= -DATE: 2024-05-16_06:05:05 +DATE: 2024-05-16_18:58:14 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -50,7 +45,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +53,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 1041 events) - [COUNTERS] PROGRAM TOTAL : 2.7059s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3465s - [COUNTERS] Fortran MEs ( 1 ) : 2.3593s for 8192 events => throughput is 3.47E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.9788s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2188s + [COUNTERS] Fortran MEs ( 1 ) : 1.7600s for 8192 events => throughput is 4.65E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -75,7 +70,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +78,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.7035s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3438s - [COUNTERS] Fortran MEs ( 1 ) : 2.3597s for 8192 events => throughput is 3.47E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.9800s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2176s + [COUNTERS] Fortran MEs ( 1 ) : 1.7624s for 8192 events => throughput is 4.65E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -100,7 +95,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /tmp/avalassi/output_smeftggtttt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +103,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > / [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 27.5770s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8193s - [COUNTERS] Fortran MEs ( 1 ) : 25.7577s for 90112 events => throughput is 3.50E+03 events/s + [COUNTERS] PROGRAM TOTAL : 20.5105s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1674s + [COUNTERS] Fortran MEs ( 1 ) : 19.3430s for 90112 events => throughput is 4.66E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,7 +120,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -133,9 +128,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896696375074447E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 5.3005s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7718s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.5286s for 8192 events => throughput is 3.24E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.9498s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0435s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.9063s for 8192 events => throughput is 4.30E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -158,7 +153,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -166,9 +161,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668081976882373E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 31.9857s - [COUNTERS] Fortran Overhead ( 0 ) : 4.2191s - [COUNTERS] CudaCpp MEs ( 2 ) : 27.7666s for 90112 events => throughput is 3.25E+03 events/s + [COUNTERS] PROGRAM TOTAL : 23.9601s + [COUNTERS] Fortran Overhead ( 0 ) : 3.0039s + [COUNTERS] CudaCpp MEs ( 2 ) : 20.9562s for 90112 events => throughput is 4.30E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -179,14 +174,14 @@ OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668081976882373E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.386546e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.496946e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.397002e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.495706e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +196,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,9 +204,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896696285825688E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.8744s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5934s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2810s for 8192 events => throughput is 6.40E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.0599s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1247s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9352s for 8192 events => throughput is 8.76E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -234,7 +229,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -242,9 +237,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668081890954375E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 17.1795s - [COUNTERS] Fortran Overhead ( 0 ) : 3.0414s - [COUNTERS] CudaCpp MEs ( 2 ) : 14.1381s for 90112 events => throughput is 6.37E+03 events/s + [COUNTERS] PROGRAM TOTAL : 12.3858s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0854s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.3004s for 90112 events => throughput is 8.75E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -255,14 +250,14 @@ OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668081890954375E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.678262e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.046465e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.743588e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.046622e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -277,7 +272,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -285,9 +280,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896696427369838E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.4779s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9015s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5764s for 8192 events => throughput is 1.42E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.0405s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6241s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4164s for 8192 events => throughput is 1.97E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -310,7 +305,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -318,9 +313,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668082030339872E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 8.7035s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3540s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.3495s for 90112 events => throughput is 1.42E+04 events/s + [COUNTERS] PROGRAM TOTAL : 6.1542s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5762s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.5780s for 90112 events => throughput is 1.97E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -331,14 +326,14 @@ OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668082030339872E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.454724e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.029073e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.476512e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.028411e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -353,7 +348,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -361,9 +356,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896696427369838E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.3378s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8314s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5064s for 8192 events => throughput is 1.62E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.9537s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5816s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3721s for 8192 events => throughput is 2.20E+04 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -386,7 +381,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -394,9 +389,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668082030339872E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 7.8877s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3042s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.5835s for 90112 events => throughput is 1.61E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.6291s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5337s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.0955s for 90112 events => throughput is 2.20E+04 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -407,14 +402,14 @@ OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668082030339872E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.679497e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.190729e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.670221e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.259030e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -429,7 +424,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -437,9 +432,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896696427369838E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.7201s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0244s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6957s for 8192 events => throughput is 1.18E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7063s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4622s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2441s for 8192 events => throughput is 3.36E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -462,7 +457,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -470,9 +465,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668082030339872E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 10.1095s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4855s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.6240s for 90112 events => throughput is 1.18E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.1083s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4220s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.6863s for 90112 events => throughput is 3.35E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -483,121 +478,17 @@ OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668082030339872E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.204025e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.407267e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.203510e+04 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697918297644E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.8365s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8192s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0172s for 8192 events => throughput is 4.75E+05 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.9896697955084444E-007) and cuda (7.9896697918297644E-007) differ by less than 2E-4 (4.6042958334879813e-10) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551547592E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 2.4662s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2768s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1894s for 90112 events => throughput is 4.76E+05 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.6668083551438230E-007) and cuda (7.6668083551547592E-007) differ by less than 2E-4 (1.4264145420384011e-12) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.814879e+05 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.185918e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.154361e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.382253e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.160102e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.387193e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.108981e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.427980e+04 ) sec^-1 -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.750323e+05 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt index 059122dda6..4c0f9c6dec 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt @@ -1,41 +1,36 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x - -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' - make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' OMP_NUM_THREADS= -DATE: 2024-05-16_06:00:05 +DATE: 2024-05-16_18:55:16 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -50,7 +45,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +53,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 685 events (found 2208 events) - [COUNTERS] PROGRAM TOTAL : 0.4148s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4054s - [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.76E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2660s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2596s + [COUNTERS] Fortran MEs ( 1 ) : 0.0064s for 8192 events => throughput is 1.28E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -75,7 +70,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +78,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 648 events (found 1275 events) - [COUNTERS] PROGRAM TOTAL : 0.3154s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3059s - [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2077s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2013s + [COUNTERS] Fortran MEs ( 1 ) : 0.0064s for 8192 events => throughput is 1.27E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -100,7 +95,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /tmp/avalassi/output_susyggt1t1_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +103,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4762 [0.47620722822826000] fbridge_mode=0 [UNWEIGHT] Wrote 1784 events (found 1789 events) - [COUNTERS] PROGRAM TOTAL : 1.3773s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2771s - [COUNTERS] Fortran MEs ( 1 ) : 0.1002s for 90112 events => throughput is 8.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9314s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8618s + [COUNTERS] Fortran MEs ( 1 ) : 0.0696s for 90112 events => throughput is 1.30E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,7 +120,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt index 01167da954..3960b3117f 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt @@ -1,41 +1,36 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cpp512y -make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 - -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' OMP_NUM_THREADS= -DATE: 2024-05-16_06:00:09 +DATE: 2024-05-16_18:55:18 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -50,7 +45,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +53,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 685 events (found 2208 events) - [COUNTERS] PROGRAM TOTAL : 0.4095s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4004s - [COUNTERS] Fortran MEs ( 1 ) : 0.0092s for 8192 events => throughput is 8.95E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2644s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2580s + [COUNTERS] Fortran MEs ( 1 ) : 0.0064s for 8192 events => throughput is 1.28E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -75,7 +70,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +78,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 648 events (found 1275 events) - [COUNTERS] PROGRAM TOTAL : 0.3199s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3104s - [COUNTERS] Fortran MEs ( 1 ) : 0.0095s for 8192 events => throughput is 8.61E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2044s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1979s + [COUNTERS] Fortran MEs ( 1 ) : 0.0064s for 8192 events => throughput is 1.28E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -100,7 +95,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /tmp/avalassi/output_susyggt1t1_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +103,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4762 [0.47620722822826000] fbridge_mode=0 [UNWEIGHT] Wrote 1784 events (found 1789 events) - [COUNTERS] PROGRAM TOTAL : 1.3992s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2973s - [COUNTERS] Fortran MEs ( 1 ) : 0.1018s for 90112 events => throughput is 8.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9297s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8602s + [COUNTERS] Fortran MEs ( 1 ) : 0.0695s for 90112 events => throughput is 1.30E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,7 +120,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt index 6c876298cd..3c9483b9b3 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt @@ -1,41 +1,36 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x - -make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' + +make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' OMP_NUM_THREADS= -DATE: 2024-05-16_06:00:14 +DATE: 2024-05-16_18:55:20 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -50,7 +45,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +53,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 685 events (found 2208 events) - [COUNTERS] PROGRAM TOTAL : 0.4113s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4020s - [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.76E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2659s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2595s + [COUNTERS] Fortran MEs ( 1 ) : 0.0064s for 8192 events => throughput is 1.28E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -75,7 +70,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +78,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 648 events (found 1275 events) - [COUNTERS] PROGRAM TOTAL : 0.3138s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3043s - [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.67E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2046s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1982s + [COUNTERS] Fortran MEs ( 1 ) : 0.0064s for 8192 events => throughput is 1.28E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -100,7 +95,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /tmp/avalassi/output_susyggt1t1_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +103,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4762 [0.47620722822826000] fbridge_mode=0 [UNWEIGHT] Wrote 1784 events (found 1789 events) - [COUNTERS] PROGRAM TOTAL : 1.3758s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2758s - [COUNTERS] Fortran MEs ( 1 ) : 0.1001s for 90112 events => throughput is 9.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9294s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8596s + [COUNTERS] Fortran MEs ( 1 ) : 0.0699s for 90112 events => throughput is 1.29E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,7 +120,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt index fd24a61552..8a45c6ae77 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt @@ -1,41 +1,36 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' - make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-05-16_05:59:47 +DATE: 2024-05-16_18:55:08 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -50,7 +45,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +53,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 2620 events (found 5403 events) - [COUNTERS] PROGRAM TOTAL : 0.8237s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7798s - [COUNTERS] Fortran MEs ( 1 ) : 0.0440s for 8192 events => throughput is 1.86E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5278s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4954s + [COUNTERS] Fortran MEs ( 1 ) : 0.0324s for 8192 events => throughput is 2.53E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -75,7 +70,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +78,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4191s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3754s - [COUNTERS] Fortran MEs ( 1 ) : 0.0437s for 8192 events => throughput is 1.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2715s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2389s + [COUNTERS] Fortran MEs ( 1 ) : 0.0326s for 8192 events => throughput is 2.52E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -100,7 +95,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp/avalassi/output_susyggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +103,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.8130s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3278s - [COUNTERS] Fortran MEs ( 1 ) : 0.4852s for 90112 events => throughput is 1.86E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.2100s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8554s + [COUNTERS] Fortran MEs ( 1 ) : 0.3547s for 90112 events => throughput is 2.54E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,7 +120,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -133,9 +128,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 171.8 [171.81273026311101] fbridge_mode=1 [UNWEIGHT] Wrote 2338 events (found 3965 events) - [COUNTERS] PROGRAM TOTAL : 0.7007s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6611s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0395s for 8192 events => throughput is 2.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4518s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4223s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0294s for 8192 events => throughput is 2.78E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt index 293718b73f..a5ee8c5b18 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt @@ -1,41 +1,36 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx - -make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-05-16_05:59:53 +DATE: 2024-05-16_18:55:11 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -50,7 +45,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +53,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 2620 events (found 5403 events) - [COUNTERS] PROGRAM TOTAL : 0.8342s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7896s - [COUNTERS] Fortran MEs ( 1 ) : 0.0446s for 8192 events => throughput is 1.84E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5283s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4959s + [COUNTERS] Fortran MEs ( 1 ) : 0.0324s for 8192 events => throughput is 2.53E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -75,7 +70,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +78,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4178s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3733s - [COUNTERS] Fortran MEs ( 1 ) : 0.0444s for 8192 events => throughput is 1.84E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2717s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2393s + [COUNTERS] Fortran MEs ( 1 ) : 0.0323s for 8192 events => throughput is 2.53E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -100,7 +95,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp/avalassi/output_susyggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +103,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.8125s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3299s - [COUNTERS] Fortran MEs ( 1 ) : 0.4826s for 90112 events => throughput is 1.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.2106s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8554s + [COUNTERS] Fortran MEs ( 1 ) : 0.3552s for 90112 events => throughput is 2.54E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,7 +120,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -133,9 +128,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 171.8 [171.81270286137041] fbridge_mode=1 [UNWEIGHT] Wrote 2338 events (found 3965 events) - [COUNTERS] PROGRAM TOTAL : 0.7025s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6657s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0367s for 8192 events => throughput is 2.23E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4544s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4274s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0270s for 8192 events => throughput is 3.04E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt index f9ac9cdc3d..4c55397aef 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt @@ -1,41 +1,36 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx - -make USEBUILDDIR=1 BACKEND=cuda +Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-05-16_05:59:59 +DATE: 2024-05-16_18:55:14 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -50,7 +45,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +53,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 2620 events (found 5403 events) - [COUNTERS] PROGRAM TOTAL : 0.8401s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7957s - [COUNTERS] Fortran MEs ( 1 ) : 0.0444s for 8192 events => throughput is 1.84E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5274s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4950s + [COUNTERS] Fortran MEs ( 1 ) : 0.0323s for 8192 events => throughput is 2.53E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -75,7 +70,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +78,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4268s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3820s - [COUNTERS] Fortran MEs ( 1 ) : 0.0448s for 8192 events => throughput is 1.83E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2696s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2373s + [COUNTERS] Fortran MEs ( 1 ) : 0.0324s for 8192 events => throughput is 2.53E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -100,7 +95,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp/avalassi/output_susyggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +103,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.8476s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3594s - [COUNTERS] Fortran MEs ( 1 ) : 0.4881s for 90112 events => throughput is 1.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.2119s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8573s + [COUNTERS] Fortran MEs ( 1 ) : 0.3546s for 90112 events => throughput is 2.54E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,7 +120,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/32 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -133,9 +128,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 171.8 [171.81273490068889] fbridge_mode=1 [UNWEIGHT] Wrote 2338 events (found 3965 events) - [COUNTERS] PROGRAM TOTAL : 0.7032s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6624s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0408s for 8192 events => throughput is 2.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4538s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4239s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0299s for 8192 events => throughput is 2.74E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** From 8b75ddf39c57d8c4d21c0d47e0f1eb110108aaf6 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 17 May 2024 08:30:01 +0200 Subject: [PATCH 42/48] [jtmk2] go back to itscrd90 logs git checkout 5fec65cc9a8df54bbbb119d22af9fdbe1d33024e tput/logs_* tmad/logs_* --- .../log_eemumu_mad_d_inl0_hrd0.txt | 309 +++++++++++------ .../log_eemumu_mad_f_inl0_hrd0.txt | 313 +++++++++++------ .../log_eemumu_mad_m_inl0_hrd0.txt | 309 +++++++++++------ .../log_ggtt_mad_d_inl0_hrd0.txt | 311 +++++++++++------ .../log_ggtt_mad_f_inl0_hrd0.txt | 309 +++++++++++------ .../log_ggtt_mad_m_inl0_hrd0.txt | 309 +++++++++++------ .../log_ggttg_mad_d_inl0_hrd0.txt | 313 +++++++++++------ .../log_ggttg_mad_f_inl0_hrd0.txt | 309 +++++++++++------ .../log_ggttg_mad_m_inl0_hrd0.txt | 311 +++++++++++------ .../log_ggttgg_mad_d_inl0_hrd0.txt | 309 +++++++++++------ .../log_ggttgg_mad_f_inl0_hrd0.txt | 307 +++++++++++------ .../log_ggttgg_mad_m_inl0_hrd0.txt | 309 +++++++++++------ .../log_ggttggg_mad_d_inl0_hrd0.txt | 309 +++++++++++------ .../log_ggttggg_mad_f_inl0_hrd0.txt | 317 ++++++++++++------ .../log_ggttggg_mad_m_inl0_hrd0.txt | 309 +++++++++++------ .../log_gqttq_mad_d_inl0_hrd0.txt | 307 +++++++++++------ .../log_gqttq_mad_f_inl0_hrd0.txt | 313 +++++++++++------ .../log_gqttq_mad_m_inl0_hrd0.txt | 311 +++++++++++------ .../log_heftggbb_mad_d_inl0_hrd0.txt | 309 +++++++++++------ .../log_heftggbb_mad_f_inl0_hrd0.txt | 71 ++-- .../log_heftggbb_mad_m_inl0_hrd0.txt | 309 +++++++++++------ .../log_smeftggtttt_mad_d_inl0_hrd0.txt | 311 +++++++++++------ .../log_smeftggtttt_mad_f_inl0_hrd0.txt | 309 +++++++++++------ .../log_smeftggtttt_mad_m_inl0_hrd0.txt | 309 +++++++++++------ .../log_susyggt1t1_mad_d_inl0_hrd0.txt | 63 ++-- .../log_susyggt1t1_mad_f_inl0_hrd0.txt | 65 ++-- .../log_susyggt1t1_mad_m_inl0_hrd0.txt | 63 ++-- .../log_susyggtt_mad_d_inl0_hrd0.txt | 69 ++-- .../log_susyggtt_mad_f_inl0_hrd0.txt | 69 ++-- .../log_susyggtt_mad_m_inl0_hrd0.txt | 69 ++-- .../log_eemumu_mad_d_inl0_hrd0.txt | 217 +++++++----- .../log_eemumu_mad_d_inl0_hrd0_bridge.txt | 225 ++++++++----- .../log_eemumu_mad_d_inl0_hrd0_common.txt | 197 ++++++----- .../log_eemumu_mad_d_inl0_hrd0_curhst.txt | 202 +++++++---- .../log_eemumu_mad_d_inl0_hrd0_rmbhst.txt | 219 +++++++----- .../log_eemumu_mad_d_inl0_hrd1.txt | 217 +++++++----- .../log_eemumu_mad_d_inl1_hrd0.txt | 217 +++++++----- .../log_eemumu_mad_d_inl1_hrd1.txt | 215 +++++++----- .../log_eemumu_mad_f_inl0_hrd0.txt | 221 ++++++------ .../log_eemumu_mad_f_inl0_hrd0_bridge.txt | 229 +++++++------ .../log_eemumu_mad_f_inl0_hrd0_common.txt | 201 ++++++----- .../log_eemumu_mad_f_inl0_hrd0_curhst.txt | 206 ++++++++---- .../log_eemumu_mad_f_inl0_hrd0_rmbhst.txt | 223 ++++++------ .../log_eemumu_mad_f_inl0_hrd1.txt | 221 ++++++------ .../log_eemumu_mad_f_inl1_hrd0.txt | 215 +++++++----- .../log_eemumu_mad_f_inl1_hrd1.txt | 215 +++++++----- .../log_eemumu_mad_m_inl0_hrd0.txt | 221 ++++++------ .../log_eemumu_mad_m_inl0_hrd1.txt | 223 ++++++------ .../log_ggtt_mad_d_inl0_hrd0.txt | 221 ++++++------ .../log_ggtt_mad_d_inl0_hrd0_bridge.txt | 229 +++++++------ .../log_ggtt_mad_d_inl0_hrd0_common.txt | 201 ++++++----- .../log_ggtt_mad_d_inl0_hrd0_curhst.txt | 206 ++++++++---- .../log_ggtt_mad_d_inl0_hrd0_rmbhst.txt | 223 ++++++------ .../log_ggtt_mad_d_inl0_hrd1.txt | 219 ++++++------ .../log_ggtt_mad_d_inl1_hrd0.txt | 215 +++++++----- .../log_ggtt_mad_d_inl1_hrd1.txt | 215 +++++++----- .../log_ggtt_mad_f_inl0_hrd0.txt | 221 ++++++------ .../log_ggtt_mad_f_inl0_hrd0_bridge.txt | 229 +++++++------ .../log_ggtt_mad_f_inl0_hrd0_common.txt | 201 ++++++----- .../log_ggtt_mad_f_inl0_hrd0_curhst.txt | 206 ++++++++---- .../log_ggtt_mad_f_inl0_hrd0_rmbhst.txt | 223 ++++++------ .../log_ggtt_mad_f_inl0_hrd1.txt | 219 ++++++------ .../log_ggtt_mad_f_inl1_hrd0.txt | 215 +++++++----- .../log_ggtt_mad_f_inl1_hrd1.txt | 215 +++++++----- .../log_ggtt_mad_m_inl0_hrd0.txt | 221 ++++++------ .../log_ggtt_mad_m_inl0_hrd1.txt | 221 ++++++------ .../log_ggttg_mad_d_inl0_hrd0.txt | 236 +++++++------ .../log_ggttg_mad_d_inl0_hrd0_bridge.txt | 248 ++++++++------ .../log_ggttg_mad_d_inl0_hrd1.txt | 236 +++++++------ .../log_ggttg_mad_f_inl0_hrd0.txt | 236 +++++++------ .../log_ggttg_mad_f_inl0_hrd0_bridge.txt | 248 ++++++++------ .../log_ggttg_mad_f_inl0_hrd1.txt | 236 +++++++------ .../log_ggttg_mad_m_inl0_hrd0.txt | 236 +++++++------ .../log_ggttg_mad_m_inl0_hrd1.txt | 238 +++++++------ .../log_ggttgg_mad_d_inl0_hrd0.txt | 236 +++++++------ .../log_ggttgg_mad_d_inl0_hrd0_bridge.txt | 248 ++++++++------ .../log_ggttgg_mad_d_inl0_hrd0_common.txt | 216 +++++++----- .../log_ggttgg_mad_d_inl0_hrd0_curhst.txt | 221 ++++++++---- .../log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt | 239 +++++++------ .../log_ggttgg_mad_d_inl0_hrd1.txt | 236 +++++++------ .../log_ggttgg_mad_d_inl1_hrd0.txt | 232 ++++++++----- .../log_ggttgg_mad_d_inl1_hrd1.txt | 232 ++++++++----- .../log_ggttgg_mad_f_inl0_hrd0.txt | 236 +++++++------ .../log_ggttgg_mad_f_inl0_hrd0_bridge.txt | 248 ++++++++------ .../log_ggttgg_mad_f_inl0_hrd0_common.txt | 216 +++++++----- .../log_ggttgg_mad_f_inl0_hrd0_curhst.txt | 221 ++++++++---- .../log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt | 239 +++++++------ .../log_ggttgg_mad_f_inl0_hrd1.txt | 242 +++++++------ .../log_ggttgg_mad_f_inl1_hrd0.txt | 236 +++++++------ .../log_ggttgg_mad_f_inl1_hrd1.txt | 236 +++++++------ .../log_ggttgg_mad_m_inl0_hrd0.txt | 236 +++++++------ .../log_ggttgg_mad_m_inl0_hrd1.txt | 236 +++++++------ .../log_ggttggg_mad_d_inl0_hrd0.txt | 236 +++++++------ .../log_ggttggg_mad_d_inl0_hrd0_bridge.txt | 248 ++++++++------ .../log_ggttggg_mad_d_inl0_hrd1.txt | 236 +++++++------ .../log_ggttggg_mad_f_inl0_hrd0.txt | 240 +++++++------ .../log_ggttggg_mad_f_inl0_hrd0_bridge.txt | 252 ++++++++------ .../log_ggttggg_mad_f_inl0_hrd1.txt | 240 +++++++------ .../log_ggttggg_mad_m_inl0_hrd0.txt | 236 +++++++------ .../log_ggttggg_mad_m_inl0_hrd1.txt | 236 +++++++------ .../log_gqttq_mad_d_inl0_hrd0.txt | 236 +++++++------ .../log_gqttq_mad_d_inl0_hrd0_bridge.txt | 248 ++++++++------ .../log_gqttq_mad_d_inl0_hrd1.txt | 240 +++++++------ .../log_gqttq_mad_f_inl0_hrd0.txt | 236 +++++++------ .../log_gqttq_mad_f_inl0_hrd0_bridge.txt | 248 ++++++++------ .../log_gqttq_mad_f_inl0_hrd1.txt | 236 +++++++------ .../log_gqttq_mad_m_inl0_hrd0.txt | 236 +++++++------ .../log_gqttq_mad_m_inl0_hrd1.txt | 236 +++++++------ .../log_heftggbb_mad_d_inl0_hrd0.txt | 221 ++++++------ .../log_heftggbb_mad_d_inl0_hrd1.txt | 219 ++++++------ .../log_heftggbb_mad_f_inl0_hrd0.txt | 221 ++++++------ .../log_heftggbb_mad_f_inl0_hrd1.txt | 221 ++++++------ .../log_heftggbb_mad_m_inl0_hrd0.txt | 221 ++++++------ .../log_heftggbb_mad_m_inl0_hrd1.txt | 223 ++++++------ .../log_smeftggtttt_mad_d_inl0_hrd0.txt | 236 +++++++------ .../log_smeftggtttt_mad_d_inl0_hrd1.txt | 236 +++++++------ .../log_smeftggtttt_mad_f_inl0_hrd0.txt | 236 +++++++------ .../log_smeftggtttt_mad_f_inl0_hrd1.txt | 236 +++++++------ .../log_smeftggtttt_mad_m_inl0_hrd0.txt | 236 +++++++------ .../log_smeftggtttt_mad_m_inl0_hrd1.txt | 236 +++++++------ .../log_susyggt1t1_mad_d_inl0_hrd0.txt | 219 ++++++------ .../log_susyggt1t1_mad_d_inl0_hrd1.txt | 219 ++++++------ .../log_susyggt1t1_mad_f_inl0_hrd0.txt | 215 +++++++----- .../log_susyggt1t1_mad_f_inl0_hrd1.txt | 215 +++++++----- .../log_susyggt1t1_mad_m_inl0_hrd0.txt | 217 +++++++----- .../log_susyggt1t1_mad_m_inl0_hrd1.txt | 219 ++++++------ .../log_susyggtt_mad_d_inl0_hrd0.txt | 221 ++++++------ .../log_susyggtt_mad_d_inl0_hrd1.txt | 219 ++++++------ .../log_susyggtt_mad_f_inl0_hrd0.txt | 221 ++++++------ .../log_susyggtt_mad_f_inl0_hrd1.txt | 219 ++++++------ .../log_susyggtt_mad_m_inl0_hrd0.txt | 221 ++++++------ .../log_susyggtt_mad_m_inl0_hrd1.txt | 221 ++++++------ 132 files changed, 18804 insertions(+), 11908 deletions(-) diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index 8cf68c4c9b..41d66d8253 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2024-05-16_15:28:51 +DATE: 2024-05-16_01:25:56 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3798 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.4710s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4643s - [COUNTERS] Fortran MEs ( 1 ) : 0.0067s for 8192 events => throughput is 1.23E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7231s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7147s + [COUNTERS] Fortran MEs ( 1 ) : 0.0084s for 8192 events => throughput is 9.76E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1222s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1157s - [COUNTERS] Fortran MEs ( 1 ) : 0.0065s for 8192 events => throughput is 1.27E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1857s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1771s + [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.54E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/output_eemumu_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2636s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1958s - [COUNTERS] Fortran MEs ( 1 ) : 0.0678s for 90112 events => throughput is 1.33E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3949s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3024s + [COUNTERS] Fortran MEs ( 1 ) : 0.0925s for 90112 events => throughput is 9.74E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -120,7 +125,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -128,9 +133,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661545E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1255s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1201s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.52E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1913s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1844s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0069s for 8192 events => throughput is 1.18E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -153,7 +158,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -161,9 +166,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2595s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2015s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0579s for 90112 events => throughput is 1.56E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3835s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3081s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0754s for 90112 events => throughput is 1.20E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -174,14 +179,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000753E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.541864e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.192081e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.584257e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.204613e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -196,7 +201,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -204,9 +209,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1233s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1200s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.51E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1864s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1822s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0042s for 8192 events => throughput is 1.95E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -229,7 +234,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -237,9 +242,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2334s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1993s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0341s for 90112 events => throughput is 2.64E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3602s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3125s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0476s for 90112 events => throughput is 1.89E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -250,14 +255,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000753E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.692869e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.953763e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.814428e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.009226e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -272,7 +277,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -280,9 +285,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1211s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1187s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0024s for 8192 events => throughput is 3.39E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1833s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1802s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.59E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -305,7 +310,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -313,9 +318,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2261s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1996s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0265s for 90112 events => throughput is 3.40E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3431s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3065s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0366s for 90112 events => throughput is 2.46E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -326,14 +331,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000739E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.533549e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.541984e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.712671e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.616899e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -348,7 +353,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -356,9 +361,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1214s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1191s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0023s for 8192 events => throughput is 3.53E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1841s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1810s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.69E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -381,7 +386,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -389,9 +394,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2242s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1988s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0254s for 90112 events => throughput is 3.55E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3388s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3042s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0346s for 90112 events => throughput is 2.61E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -402,14 +407,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000739E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.627236e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.662866e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.839363e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.883371e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -424,7 +429,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -432,9 +437,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1199s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1181s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0019s for 8192 events => throughput is 4.43E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1859s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1818s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0041s for 8192 events => throughput is 1.99E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -457,7 +462,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -465,9 +470,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2212s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2016s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0196s for 90112 events => throughput is 4.60E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3474s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3044s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0430s for 90112 events => throughput is 2.10E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -478,17 +483,121 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000739E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.651847e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.029340e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.051717e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.231218e+06 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1591 events (found 1595 events) + [COUNTERS] PROGRAM TOTAL : 0.6140s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6135s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.60E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.3382715404661532E-002) and cuda (9.3382715404661532E-002) differ by less than 3E-14 (0.0) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1782 events (found 1787 events) + [COUNTERS] PROGRAM TOTAL : 0.7363s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7314s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0049s for 90112 events => throughput is 1.86E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.1515602020000766E-002) and cuda (9.1515602020000753E-002) differ by less than 3E-14 (1.1102230246251565e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.277665e+07 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.916168e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.959957e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.493136e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.970202e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.040191e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.002261e+07 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.140061e+08 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index 6cae3f4f6d..c4c8099bbf 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -make USEBUILDDIR=1 BACKEND=cppnone - -make USEBUILDDIR=1 BACKEND=cpp512y +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cpp512z +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2024-05-16_15:28:55 +DATE: 2024-05-16_01:26:13 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3798 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.4723s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4656s - [COUNTERS] Fortran MEs ( 1 ) : 0.0067s for 8192 events => throughput is 1.23E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7287s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7200s + [COUNTERS] Fortran MEs ( 1 ) : 0.0087s for 8192 events => throughput is 9.42E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1236s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1169s - [COUNTERS] Fortran MEs ( 1 ) : 0.0067s for 8192 events => throughput is 1.23E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1869s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1783s + [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.57E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/output_eemumu_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2691s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2009s - [COUNTERS] Fortran MEs ( 1 ) : 0.0683s for 90112 events => throughput is 1.32E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3902s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2987s + [COUNTERS] Fortran MEs ( 1 ) : 0.0916s for 90112 events => throughput is 9.84E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -120,7 +125,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -128,9 +133,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382703205998396E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1233s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1185s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0047s for 8192 events => throughput is 1.73E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1903s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1836s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0067s for 8192 events => throughput is 1.22E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -153,7 +158,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -161,9 +166,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515590123565249E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2500s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1993s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0507s for 90112 events => throughput is 1.78E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3825s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3093s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0732s for 90112 events => throughput is 1.23E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -174,14 +179,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515590123565249E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.788544e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.260929e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.800072e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.250210e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -196,7 +201,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -204,9 +209,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382700723828302E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1189s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1169s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0020s for 8192 events => throughput is 4.17E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1813s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1787s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.15E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -229,7 +234,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -237,9 +242,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515587612890761E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2157s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1949s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0207s for 90112 events => throughput is 4.34E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3288s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2997s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0290s for 90112 events => throughput is 3.10E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -250,14 +255,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515587612890761E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.345215e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.206836e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.518939e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.334282e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -272,7 +277,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -280,9 +285,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382700679354239E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1228s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1210s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.52E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1835s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1811s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0024s for 8192 events => throughput is 3.38E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -305,7 +310,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -313,9 +318,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515587619408464E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2201s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2005s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0196s for 90112 events => throughput is 4.59E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3304s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3034s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0270s for 90112 events => throughput is 3.34E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -326,14 +331,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515587619408464E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.843944e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.522447e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.040424e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.386931e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -348,7 +353,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -356,9 +361,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382700679354239E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1222s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1204s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.57E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1829s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1805s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0024s for 8192 events => throughput is 3.41E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -381,7 +386,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -389,9 +394,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515587619408464E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2202s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2006s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0196s for 90112 events => throughput is 4.60E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3301s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3033s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0268s for 90112 events => throughput is 3.37E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -402,14 +407,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515587619408464E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.884882e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.528072e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.111021e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.720927e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -424,7 +429,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -432,9 +437,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382704335459282E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1204s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1192s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0012s for 8192 events => throughput is 6.61E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1844s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1819s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.21E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -457,7 +462,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -465,9 +470,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515591296252558E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2134s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2000s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0134s for 90112 events => throughput is 6.70E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3371s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3080s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0291s for 90112 events => throughput is 3.10E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -478,17 +483,121 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515591296252558E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.259477e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.341186e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.844948e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.598530e+06 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09338 [9.3382706077425631E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1591 events (found 1595 events) + [COUNTERS] PROGRAM TOTAL : 0.6090s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6085s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.68E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.3382715404661532E-002) and cuda (9.3382706077425631E-002) differ by less than 4E-4 (9.988182347875352e-08) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09152 [9.1515592892887687E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1782 events (found 1787 events) + [COUNTERS] PROGRAM TOTAL : 0.7344s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7297s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0046s for 90112 events => throughput is 1.95E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.1515602020000766E-002) and cuda (9.1515592892887687E-002) differ by less than 4E-4 (9.973286385633884e-08) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.546893e+07 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.804903e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.477327e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.060127e+09 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.389797e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.251129e+09 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.752691e+07 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.481445e+08 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index fa324ac52a..fc86f120db 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2024-05-16_15:28:59 +DATE: 2024-05-16_01:26:29 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3798 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.4705s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4639s - [COUNTERS] Fortran MEs ( 1 ) : 0.0067s for 8192 events => throughput is 1.23E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7237s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7153s + [COUNTERS] Fortran MEs ( 1 ) : 0.0084s for 8192 events => throughput is 9.74E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1234s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1169s - [COUNTERS] Fortran MEs ( 1 ) : 0.0065s for 8192 events => throughput is 1.27E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1861s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1773s + [COUNTERS] Fortran MEs ( 1 ) : 0.0087s for 8192 events => throughput is 9.40E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/output_eemumu_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2647s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1962s - [COUNTERS] Fortran MEs ( 1 ) : 0.0685s for 90112 events => throughput is 1.32E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4045s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3124s + [COUNTERS] Fortran MEs ( 1 ) : 0.0920s for 90112 events => throughput is 9.79E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -120,7 +125,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -128,9 +133,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715420701395E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1276s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1218s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0057s for 8192 events => throughput is 1.43E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1967s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1895s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0072s for 8192 events => throughput is 1.14E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -153,7 +158,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -161,9 +166,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602033080859E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2614s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2022s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0592s for 90112 events => throughput is 1.52E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3901s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3110s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0791s for 90112 events => throughput is 1.14E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -174,14 +179,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602033080859E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.494771e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.191141e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.550621e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.200935e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -196,7 +201,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -204,9 +209,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715420701354E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1229s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1197s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.59E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1857s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1815s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0041s for 8192 events => throughput is 1.98E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -229,7 +234,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -237,9 +242,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602033080859E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2320s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1991s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0329s for 90112 events => throughput is 2.74E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3486s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3030s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0456s for 90112 events => throughput is 1.98E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -250,14 +255,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602033080859E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.753216e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.010123e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.821401e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.071657e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -272,7 +277,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -280,9 +285,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1198s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1174s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0024s for 8192 events => throughput is 3.41E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1848s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1815s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.49E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -305,7 +310,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -313,9 +318,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2265s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2002s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0263s for 90112 events => throughput is 3.42E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3402s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3039s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0363s for 90112 events => throughput is 2.49E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -326,14 +331,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602022697845E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.554099e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.462700e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.727720e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.639506e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -348,7 +353,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -356,9 +361,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1205s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1182s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0023s for 8192 events => throughput is 3.56E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1850s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1819s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.61E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -381,7 +386,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -389,9 +394,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2246s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1995s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0250s for 90112 events => throughput is 3.60E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3431s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3079s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0351s for 90112 events => throughput is 2.56E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -402,14 +407,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602022697845E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.764121e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.637002e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.944177e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.800572e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -424,7 +429,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -432,9 +437,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1198s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1181s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0017s for 8192 events => throughput is 4.69E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1865s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1828s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0037s for 8192 events => throughput is 2.21E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -457,7 +462,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -465,9 +470,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2164s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1979s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0185s for 90112 events => throughput is 4.86E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3487s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3069s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0418s for 90112 events => throughput is 2.16E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -478,17 +483,121 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602022697845E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.022445e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.197743e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.354948e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.278261e+06 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09338 [9.3382715392009194E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1591 events (found 1595 events) + [COUNTERS] PROGRAM TOTAL : 0.6095s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6090s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.64E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.3382715404661532E-002) and cuda (9.3382715392009194E-002) differ by less than 2E-4 (1.3548906441229747e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09152 [9.1515602021089631E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1782 events (found 1787 events) + [COUNTERS] PROGRAM TOTAL : 0.7369s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7319s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0050s for 90112 events => throughput is 1.81E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.1515602020000766E-002) and cuda (9.1515602021089631E-002) differ by less than 2E-4 (1.1898038110302878e-11) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.356139e+07 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.953546e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.960740e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.522141e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.009432e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.090602e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.972046e+07 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.157381e+08 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index 92de97ac10..e1be7813b6 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx - -make USEBUILDDIR=1 BACKEND=cppnone +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cpp512y +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 + +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-05-16_15:29:02 +DATE: 2024-05-16_01:26:45 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 2601 events (found 5405 events) - [COUNTERS] PROGRAM TOTAL : 0.5258s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4937s - [COUNTERS] Fortran MEs ( 1 ) : 0.0321s for 8192 events => throughput is 2.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8221s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7787s + [COUNTERS] Fortran MEs ( 1 ) : 0.0433s for 8192 events => throughput is 1.89E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.2706s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2386s - [COUNTERS] Fortran MEs ( 1 ) : 0.0320s for 8192 events => throughput is 2.56E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4146s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3704s + [COUNTERS] Fortran MEs ( 1 ) : 0.0442s for 8192 events => throughput is 1.85E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/output_ggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.2060s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8546s - [COUNTERS] Fortran MEs ( 1 ) : 0.3515s for 90112 events => throughput is 2.56E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7473s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2714s + [COUNTERS] Fortran MEs ( 1 ) : 0.4758s for 90112 events => throughput is 1.89E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -120,7 +125,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -128,9 +133,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.2934s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2640s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0294s for 8192 events => throughput is 2.79E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4516s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4120s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0396s for 8192 events => throughput is 2.07E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -153,7 +158,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -161,9 +166,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.2126s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8889s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3237s for 90112 events => throughput is 2.78E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8025s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3670s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4355s for 90112 events => throughput is 2.07E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -174,14 +179,14 @@ OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989121) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.781252e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.132783e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.853290e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.139840e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -196,7 +201,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -204,9 +209,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.2705s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2539s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0166s for 8192 events => throughput is 4.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4276s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4032s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0244s for 8192 events => throughput is 3.36E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -229,7 +234,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -237,9 +242,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989106] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.0529s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8705s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1823s for 90112 events => throughput is 4.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6069s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3541s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2528s for 90112 events => throughput is 3.56E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -250,14 +255,14 @@ OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989106) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.903053e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.613591e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.023847e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.632391e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -272,7 +277,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -280,9 +285,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.2575s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2474s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0101s for 8192 events => throughput is 8.13E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3994s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3855s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0139s for 8192 events => throughput is 5.89E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -305,7 +310,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -313,9 +318,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 0.9777s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8672s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1104s for 90112 events => throughput is 8.16E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5056s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3497s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1559s for 90112 events => throughput is 5.78E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -326,14 +331,14 @@ OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989121) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.417521e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.889737e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.515777e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.921310e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -348,7 +353,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -356,9 +361,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.2622s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2525s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0097s for 8192 events => throughput is 8.42E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3995s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3870s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0125s for 8192 events => throughput is 6.55E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -381,7 +386,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -389,9 +394,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 0.9756s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8691s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1065s for 90112 events => throughput is 8.46E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4813s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3413s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1399s for 90112 events => throughput is 6.44E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -402,14 +407,14 @@ OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989121) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.686858e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.578026e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.827717e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.673606e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -424,7 +429,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -432,9 +437,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.2551s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2457s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0094s for 8192 events => throughput is 8.72E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4212s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3991s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0221s for 8192 events => throughput is 3.70E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -457,7 +462,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -465,9 +470,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 0.9657s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8630s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1027s for 90112 events => throughput is 8.78E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5933s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3549s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2384s for 90112 events => throughput is 3.78E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -478,17 +483,121 @@ OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989121) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.784138e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.816986e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.794801e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.814285e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=1 + [UNWEIGHT] Wrote 1603 events (found 1608 events) + [COUNTERS] PROGRAM TOTAL : 0.8124s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8118s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.42E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.094184803756640) and cuda (47.094184803756640) differ by less than 3E-14 (0.0) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 + [UNWEIGHT] Wrote 1744 events (found 1749 events) + [COUNTERS] PROGRAM TOTAL : 1.7642s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7574s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0067s for 90112 events => throughput is 1.34E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.105695279989114) and cuda (47.105695279989121) differ by less than 3E-14 (2.220446049250313e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.120396e+07 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.622859e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.177398e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.080565e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.172657e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.155839e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.173872e+07 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.068966e+07 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 79930797e3..0b367d2d96 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-05-16_15:29:12 +DATE: 2024-05-16_01:27:12 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 2601 events (found 5405 events) - [COUNTERS] PROGRAM TOTAL : 0.5279s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4958s - [COUNTERS] Fortran MEs ( 1 ) : 0.0321s for 8192 events => throughput is 2.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8191s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7751s + [COUNTERS] Fortran MEs ( 1 ) : 0.0439s for 8192 events => throughput is 1.87E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.2696s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2376s - [COUNTERS] Fortran MEs ( 1 ) : 0.0321s for 8192 events => throughput is 2.56E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4113s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3679s + [COUNTERS] Fortran MEs ( 1 ) : 0.0434s for 8192 events => throughput is 1.89E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/output_ggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.2045s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8527s - [COUNTERS] Fortran MEs ( 1 ) : 0.3517s for 90112 events => throughput is 2.56E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7478s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2691s + [COUNTERS] Fortran MEs ( 1 ) : 0.4787s for 90112 events => throughput is 1.88E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -120,7 +125,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -128,9 +133,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094179780921394] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.2917s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2649s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0268s for 8192 events => throughput is 3.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4476s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4108s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0368s for 8192 events => throughput is 2.23E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -153,7 +158,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -161,9 +166,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105688579298537] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.1759s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8812s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2948s for 90112 events => throughput is 3.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7717s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3659s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4058s for 90112 events => throughput is 2.22E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -174,14 +179,14 @@ OK! xsec from fortran (47.105695279989114) and cpp (47.105688579298537) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.120974e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.257844e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.107700e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.292052e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -196,7 +201,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -204,9 +209,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094175850060040] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.2605s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2487s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0118s for 8192 events => throughput is 6.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4035s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3878s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0157s for 8192 events => throughput is 5.23E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -229,7 +234,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -237,9 +242,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105684763984058] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 0.9982s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8679s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1302s for 90112 events => throughput is 6.92E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5166s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3428s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1737s for 90112 events => throughput is 5.19E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -250,14 +255,14 @@ OK! xsec from fortran (47.105695279989114) and cpp (47.105684763984058) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.809775e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.218996e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.899155e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.263655e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -272,7 +277,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -280,9 +285,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094173652938650] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.2492s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2431s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0061s for 8192 events => throughput is 1.34E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3915s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3833s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0082s for 8192 events => throughput is 1.00E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -305,7 +310,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -313,9 +318,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105684048677361] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 0.9267s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8595s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0672s for 90112 events => throughput is 1.34E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.4300s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3376s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0925s for 90112 events => throughput is 9.74E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -326,14 +331,14 @@ OK! xsec from fortran (47.105695279989114) and cpp (47.105684048677361) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.403362e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.896073e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.409433e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.940492e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -348,7 +353,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -356,9 +361,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094173652938650] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.2496s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2437s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0059s for 8192 events => throughput is 1.39E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3898s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3819s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0079s for 8192 events => throughput is 1.04E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -381,7 +386,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -389,9 +394,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105684048677361] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 0.9278s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8630s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0648s for 90112 events => throughput is 1.39E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.4168s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3312s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0856s for 90112 events => throughput is 1.05E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -402,14 +407,14 @@ OK! xsec from fortran (47.105695279989114) and cpp (47.105684048677361) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.446646e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.032975e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.465351e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.050779e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -424,7 +429,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -432,9 +437,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094178213275804] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.2459s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2407s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0052s for 8192 events => throughput is 1.57E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3948s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3837s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0111s for 8192 events => throughput is 7.41E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -457,7 +462,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -465,9 +470,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105688407939567] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 0.9192s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8622s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0570s for 90112 events => throughput is 1.58E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.4646s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3397s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1249s for 90112 events => throughput is 7.22E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -478,17 +483,121 @@ OK! xsec from fortran (47.105695279989114) and cpp (47.105688407939567) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.607550e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.304914e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.622041e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.408593e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.09 [47.094184344050284] fbridge_mode=1 + [UNWEIGHT] Wrote 1603 events (found 1608 events) + [COUNTERS] PROGRAM TOTAL : 0.8097s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8091s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.50E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.094184803756640) and cuda (47.094184344050284) differ by less than 4E-4 (9.761425112664313e-09) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.11 [47.105694586476879] fbridge_mode=1 + [UNWEIGHT] Wrote 1744 events (found 1749 events) + [COUNTERS] PROGRAM TOTAL : 1.7654s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7594s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0060s for 90112 events => throughput is 1.51E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.105695279989114) and cuda (47.105694586476879) differ by less than 4E-4 (1.4722471020078842e-08) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.397168e+07 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.912682e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.099083e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.785250e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.065057e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.885291e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.649544e+07 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.436840e+07 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index 21f98092a2..197f6200da 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-05-16_15:29:22 +DATE: 2024-05-16_01:27:37 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 2601 events (found 5405 events) - [COUNTERS] PROGRAM TOTAL : 0.5266s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4946s - [COUNTERS] Fortran MEs ( 1 ) : 0.0321s for 8192 events => throughput is 2.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8243s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7810s + [COUNTERS] Fortran MEs ( 1 ) : 0.0433s for 8192 events => throughput is 1.89E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.2699s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2378s - [COUNTERS] Fortran MEs ( 1 ) : 0.0321s for 8192 events => throughput is 2.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4109s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3676s + [COUNTERS] Fortran MEs ( 1 ) : 0.0433s for 8192 events => throughput is 1.89E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/output_ggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.2063s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8547s - [COUNTERS] Fortran MEs ( 1 ) : 0.3516s for 90112 events => throughput is 2.56E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7536s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2749s + [COUNTERS] Fortran MEs ( 1 ) : 0.4787s for 90112 events => throughput is 1.88E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -120,7 +125,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -128,9 +133,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094186141863901] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.2971s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2674s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0297s for 8192 events => throughput is 2.76E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4520s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4121s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0400s for 8192 events => throughput is 2.05E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -153,7 +158,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -161,9 +166,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696630006634] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.2144s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8891s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3253s for 90112 events => throughput is 2.77E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8139s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3740s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4399s for 90112 events => throughput is 2.05E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -174,14 +179,14 @@ OK! xsec from fortran (47.105695279989114) and cpp (47.105696630006634) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.772812e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.079375e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.835026e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.079476e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -196,7 +201,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -204,9 +209,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094186141863901] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.2690s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2527s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0163s for 8192 events => throughput is 5.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4154s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3930s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0224s for 8192 events => throughput is 3.67E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -229,7 +234,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -237,9 +242,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696630006626] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.0487s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8696s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1791s for 90112 events => throughput is 5.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5989s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3477s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2513s for 90112 events => throughput is 3.59E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -250,14 +255,14 @@ OK! xsec from fortran (47.105695279989114) and cpp (47.105696630006626) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.010731e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.535235e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.141504e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.688874e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -272,7 +277,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -280,9 +285,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.2573s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2474s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0099s for 8192 events => throughput is 8.24E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4007s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3868s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0139s for 8192 events => throughput is 5.90E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -305,7 +310,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -313,9 +318,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 0.9721s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8635s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1086s for 90112 events => throughput is 8.30E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5000s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3471s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1530s for 90112 events => throughput is 5.89E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -326,14 +331,14 @@ OK! xsec from fortran (47.105695279989114) and cpp (47.105696663215774) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.587881e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.781843e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.652671e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.986506e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -348,7 +353,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -356,9 +361,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.2547s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2451s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0096s for 8192 events => throughput is 8.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4023s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3898s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0125s for 8192 events => throughput is 6.55E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -381,7 +386,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -389,9 +394,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 0.9755s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8710s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1046s for 90112 events => throughput is 8.62E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4677s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3316s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1361s for 90112 events => throughput is 6.62E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -402,14 +407,14 @@ OK! xsec from fortran (47.105695279989114) and cpp (47.105696663215774) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.860391e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.671424e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.013862e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.782154e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -424,7 +429,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -432,9 +437,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.2554s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2462s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0092s for 8192 events => throughput is 8.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4150s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3944s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0206s for 8192 events => throughput is 3.98E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -457,7 +462,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -465,9 +470,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 0.9648s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8641s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1007s for 90112 events => throughput is 8.95E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5820s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3527s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2292s for 90112 events => throughput is 3.93E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -478,17 +483,121 @@ OK! xsec from fortran (47.105695279989114) and cpp (47.105696663215774) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.885304e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.998616e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.030042e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.990048e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.09 [47.094184798437830] fbridge_mode=1 + [UNWEIGHT] Wrote 1603 events (found 1608 events) + [COUNTERS] PROGRAM TOTAL : 0.8154s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8148s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.45E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.094184803756640) and cuda (47.094184798437830) differ by less than 2E-4 (1.1293987967064822e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.11 [47.105695279068492] fbridge_mode=1 + [UNWEIGHT] Wrote 1744 events (found 1749 events) + [COUNTERS] PROGRAM TOTAL : 1.7703s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7635s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0068s for 90112 events => throughput is 1.32E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.105695279989114) and cuda (47.105695279068492) differ by less than 2E-4 (1.954369999168648e-11) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.143723e+07 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.636090e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.182886e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.066867e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.180722e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.146460e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.158610e+07 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.030823e+07 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index b5560890f7..7f0ff41464 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cpp512y +make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cpp512z +make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2024-05-16_15:29:32 +DATE: 2024-05-16_01:28:04 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 365 events (found 1496 events) - [COUNTERS] PROGRAM TOTAL : 0.4931s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2363s - [COUNTERS] Fortran MEs ( 1 ) : 0.2568s for 8192 events => throughput is 3.19E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7020s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3669s + [COUNTERS] Fortran MEs ( 1 ) : 0.3350s for 8192 events => throughput is 2.45E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4664s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2101s - [COUNTERS] Fortran MEs ( 1 ) : 0.2562s for 8192 events => throughput is 3.20E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6627s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3273s + [COUNTERS] Fortran MEs ( 1 ) : 0.3354s for 8192 events => throughput is 2.44E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/output_ggttg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 3.8423s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0257s - [COUNTERS] Fortran MEs ( 1 ) : 2.8165s for 90112 events => throughput is 3.20E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.2831s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5823s + [COUNTERS] Fortran MEs ( 1 ) : 3.7008s for 90112 events => throughput is 2.43E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -120,7 +125,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -128,9 +133,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.7357s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4735s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2622s for 8192 events => throughput is 3.12E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.0127s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6679s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3448s for 8192 events => throughput is 2.38E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -153,7 +158,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -161,9 +166,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717666E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 4.1552s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2739s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.8813s for 90112 events => throughput is 3.13E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.7090s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9162s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.7928s for 90112 events => throughput is 2.38E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -174,14 +179,14 @@ OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717666E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.225002e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.444412e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.233456e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.433186e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -196,7 +201,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -204,9 +209,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607748863] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4751s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3411s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1340s for 8192 events => throughput is 6.11E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6869s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5073s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1796s for 8192 events => throughput is 4.56E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -229,7 +234,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -237,9 +242,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717666E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.6253s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1520s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.4733s for 90112 events => throughput is 6.12E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.7766s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7859s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.9906s for 90112 events => throughput is 4.53E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -250,14 +255,14 @@ OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717666E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.259323e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.630294e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.287634e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.629345e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -272,7 +277,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -280,9 +285,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.3361s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2728s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0633s for 8192 events => throughput is 1.29E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5178s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4262s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0916s for 8192 events => throughput is 8.94E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -305,7 +310,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -313,9 +318,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 1.7948s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0991s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6957s for 90112 events => throughput is 1.30E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.7021s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6979s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0042s for 90112 events => throughput is 8.97E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -326,14 +331,14 @@ OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717722E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.325938e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.244917e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.324831e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.251295e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -348,7 +353,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -356,9 +361,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.3290s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2691s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0599s for 8192 events => throughput is 1.37E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4953s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4129s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0824s for 8192 events => throughput is 9.95E+04 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -381,7 +386,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -389,9 +394,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 1.7437s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0841s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6595s for 90112 events => throughput is 1.37E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.5931s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6918s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9014s for 90112 events => throughput is 1.00E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -402,14 +407,14 @@ OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717722E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.401744e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.033892e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.402920e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.035720e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -424,7 +429,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -432,9 +437,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.2953s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2525s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0428s for 8192 events => throughput is 1.91E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5700s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4516s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1184s for 8192 events => throughput is 6.92E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -457,7 +462,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -465,9 +470,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 1.5450s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0758s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4692s for 90112 events => throughput is 1.92E+05 events/s + [COUNTERS] PROGRAM TOTAL : 3.0137s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7172s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2965s for 90112 events => throughput is 6.95E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -478,17 +483,121 @@ OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717722E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.927281e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.144179e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.958513e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.156532e+04 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=1 + [UNWEIGHT] Wrote 386 events (found 1179 events) + [COUNTERS] PROGRAM TOTAL : 0.7726s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7672s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.52E+06 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.10112748607749111) and cuda (0.10112748607749111) differ by less than 3E-14 (0.0) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07924 [7.9238481932717736E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 2.0397s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0164s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0233s for 90112 events => throughput is 3.87E+06 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9238481932717722E-002) and cuda (7.9238481932717736E-002) differ by less than 3E-14 (2.220446049250313e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.642318e+06 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.930638e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.882259e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.244433e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.893041e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.255841e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.907568e+06 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.774192e+06 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 0eae6835ae..1a8c36aa43 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 - - make USEBUILDDIR=1 BACKEND=cppavx2 + make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2024-05-16_15:29:53 +DATE: 2024-05-16_01:28:48 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 365 events (found 1496 events) - [COUNTERS] PROGRAM TOTAL : 0.4912s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2344s - [COUNTERS] Fortran MEs ( 1 ) : 0.2567s for 8192 events => throughput is 3.19E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7057s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3678s + [COUNTERS] Fortran MEs ( 1 ) : 0.3379s for 8192 events => throughput is 2.42E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4667s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2105s - [COUNTERS] Fortran MEs ( 1 ) : 0.2563s for 8192 events => throughput is 3.20E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6650s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3295s + [COUNTERS] Fortran MEs ( 1 ) : 0.3355s for 8192 events => throughput is 2.44E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/output_ggttg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 3.8451s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0270s - [COUNTERS] Fortran MEs ( 1 ) : 2.8182s for 90112 events => throughput is 3.20E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.3113s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5981s + [COUNTERS] Fortran MEs ( 1 ) : 3.7132s for 90112 events => throughput is 2.43E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -120,7 +125,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -128,9 +133,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112722621426752] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.7117s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4568s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2549s for 8192 events => throughput is 3.21E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.9881s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6537s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3344s for 8192 events => throughput is 2.45E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -153,7 +158,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -161,9 +166,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238468310179624E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 4.0795s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2753s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.8043s for 90112 events => throughput is 3.21E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.5951s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9082s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.6869s for 90112 events => throughput is 2.44E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -174,14 +179,14 @@ OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238468310179624E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.321881e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.531268e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.327799e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.536337e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -196,7 +201,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -204,9 +209,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112720710186394] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.3616s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2865s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0751s for 8192 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5295s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4297s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0998s for 8192 events => throughput is 8.21E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -229,7 +234,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -237,9 +242,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238454786658835E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 1.9233s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0962s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8271s for 90112 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.7829s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6780s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1050s for 90112 events => throughput is 8.16E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -250,14 +255,14 @@ OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238454786658835E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.110453e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.418674e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.110952e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.378970e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -272,7 +277,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -280,9 +285,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112721766950902] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.2773s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2441s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0333s for 8192 events => throughput is 2.46E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4226s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3763s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0463s for 8192 events => throughput is 1.77E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -305,7 +310,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -313,9 +318,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238453735016964E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 1.4188s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0535s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3654s for 90112 events => throughput is 2.47E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.1354s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6236s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5118s for 90112 events => throughput is 1.76E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -326,14 +331,14 @@ OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238453735016964E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.540175e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.814902e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.544722e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.818224e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -348,7 +353,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -356,9 +361,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112721766950902] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.2721s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2410s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0312s for 8192 events => throughput is 2.63E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4122s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3704s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0417s for 8192 events => throughput is 1.96E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -381,7 +386,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -389,9 +394,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238453735016964E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 1.3955s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0527s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3428s for 90112 events => throughput is 2.63E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0774s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6180s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4593s for 90112 events => throughput is 1.96E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -402,14 +407,14 @@ OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238453735016964E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.701778e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.016267e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.701640e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.024583e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -424,7 +429,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -432,9 +437,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112723387847480] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.2507s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2303s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0204s for 8192 events => throughput is 4.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4393s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3831s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0562s for 8192 events => throughput is 1.46E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -457,7 +462,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -465,9 +470,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238464410949921E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 1.2644s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0403s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2240s for 90112 events => throughput is 4.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2433s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6283s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6150s for 90112 events => throughput is 1.47E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -478,17 +483,121 @@ OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238464410949921E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.017502e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.471448e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.055236e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.496104e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.1011 [0.10112726034625694] fbridge_mode=1 + [UNWEIGHT] Wrote 386 events (found 1179 events) + [COUNTERS] PROGRAM TOTAL : 0.7661s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7652s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0009s for 8192 events => throughput is 9.57E+06 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.10112748607749111) and cuda (0.10112726034625694) differ by less than 4E-4 (2.2321452152196386e-06) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07924 [7.9238473828077680E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 2.0217s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0116s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0101s for 90112 events => throughput is 8.94E+06 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9238481932717722E-002) and cuda (7.9238473828077680E-002) differ by less than 4E-4 (1.0228161673175862e-07) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.279804e+07 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.849139e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.708780e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.376255e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.741880e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.526731e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.576787e+07 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.628936e+07 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index a059b7dc3a..06cc385635 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 - -make USEBUILDDIR=1 BACKEND=cpp512z make USEBUILDDIR=1 BACKEND=cppavx2 + make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' + +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2024-05-16_15:30:11 +DATE: 2024-05-16_01:29:27 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 365 events (found 1496 events) - [COUNTERS] PROGRAM TOTAL : 0.4931s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2367s - [COUNTERS] Fortran MEs ( 1 ) : 0.2564s for 8192 events => throughput is 3.19E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7042s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3668s + [COUNTERS] Fortran MEs ( 1 ) : 0.3374s for 8192 events => throughput is 2.43E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4693s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2130s - [COUNTERS] Fortran MEs ( 1 ) : 0.2562s for 8192 events => throughput is 3.20E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6678s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3296s + [COUNTERS] Fortran MEs ( 1 ) : 0.3383s for 8192 events => throughput is 2.42E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/output_ggttg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 3.8534s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0340s - [COUNTERS] Fortran MEs ( 1 ) : 2.8193s for 90112 events => throughput is 3.20E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.3035s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5927s + [COUNTERS] Fortran MEs ( 1 ) : 3.7109s for 90112 events => throughput is 2.43E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -120,7 +125,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -128,9 +133,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748700702684] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.7386s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4711s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2675s for 8192 events => throughput is 3.06E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.0211s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6702s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3509s for 8192 events => throughput is 2.33E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -153,7 +158,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -161,9 +166,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238482679400354E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 4.2166s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2876s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.9290s for 90112 events => throughput is 3.08E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.7816s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9205s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.8611s for 90112 events => throughput is 2.33E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -174,14 +179,14 @@ OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482679400354E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.171564e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.398467e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.176804e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.396772e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -196,7 +201,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -204,9 +209,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748702805033] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4715s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3391s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1324s for 8192 events => throughput is 6.19E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6812s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5038s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1774s for 8192 events => throughput is 4.62E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -229,7 +234,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -237,9 +242,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238482683055667E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.6066s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1524s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.4541s for 90112 events => throughput is 6.20E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.6988s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7524s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.9464s for 90112 events => throughput is 4.63E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -250,14 +255,14 @@ OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482683055667E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.373029e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.777911e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.384878e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.770421e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -272,7 +277,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -280,9 +285,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748681415580] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.3356s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2723s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0632s for 8192 events => throughput is 1.30E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5041s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4149s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0892s for 8192 events => throughput is 9.19E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -305,7 +310,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -313,9 +318,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238482534347232E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 1.7794s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0852s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6942s for 90112 events => throughput is 1.30E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.6526s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6680s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9846s for 90112 events => throughput is 9.15E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -326,14 +331,14 @@ OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482534347232E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.328079e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.374488e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.330880e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.304457e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -348,7 +353,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -356,9 +361,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748681415580] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.3260s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2671s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0589s for 8192 events => throughput is 1.39E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4867s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4079s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0788s for 8192 events => throughput is 1.04E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -381,7 +386,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -389,9 +394,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238482534347232E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 1.7265s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0792s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6473s for 90112 events => throughput is 1.39E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.5251s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6551s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8699s for 90112 events => throughput is 1.04E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -402,14 +407,14 @@ OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482534347232E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.433864e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.072957e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.433043e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.074127e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -424,7 +429,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -432,9 +437,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748700265108] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.2991s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2545s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0446s for 8192 events => throughput is 1.84E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5672s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4475s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1197s for 8192 events => throughput is 6.85E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -457,7 +462,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -465,9 +470,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238482666076374E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 1.5607s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0699s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4908s for 90112 events => throughput is 1.84E+05 events/s + [COUNTERS] PROGRAM TOTAL : 3.0147s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7041s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.3105s for 90112 events => throughput is 6.88E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -478,17 +483,121 @@ OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482666076374E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.818790e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.810756e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.858942e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.935663e+04 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.1011 [0.10112748601943165] fbridge_mode=1 + [UNWEIGHT] Wrote 386 events (found 1179 events) + [COUNTERS] PROGRAM TOTAL : 0.7757s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7703s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.51E+06 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.10112748607749111) and cuda (0.10112748601943165) differ by less than 2E-4 (5.74121417074025e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07924 [7.9238481937154381E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 2.0407s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0176s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0230s for 90112 events => throughput is 3.91E+06 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9238481932717722E-002) and cuda (7.9238481937154381E-002) differ by less than 2E-4 (5.5991211667105745e-11) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.631069e+06 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.120692e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.856212e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.234939e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.866138e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.243613e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.862499e+06 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.731505e+06 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index 41a5914b73..744dd47e66 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2024-05-16_15:30:32 +DATE: 2024-05-16_01:30:11 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 [UNWEIGHT] Wrote 62 events (found 950 events) - [COUNTERS] PROGRAM TOTAL : 3.6763s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2337s - [COUNTERS] Fortran MEs ( 1 ) : 3.4427s for 8192 events => throughput is 2.38E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.8074s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3646s + [COUNTERS] Fortran MEs ( 1 ) : 4.4427s for 8192 events => throughput is 1.84E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 3.6754s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2324s - [COUNTERS] Fortran MEs ( 1 ) : 3.4430s for 8192 events => throughput is 2.38E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.8076s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3603s + [COUNTERS] Fortran MEs ( 1 ) : 4.4473s for 8192 events => throughput is 1.84E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/output_ggttgg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=0 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 39.2780s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4056s - [COUNTERS] Fortran MEs ( 1 ) : 37.8724s for 90112 events => throughput is 2.38E+03 events/s + [COUNTERS] PROGRAM TOTAL : 50.3676s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1555s + [COUNTERS] Fortran MEs ( 1 ) : 48.2121s for 90112 events => throughput is 1.87E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -120,7 +125,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -128,9 +133,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102372E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 6.9383s - [COUNTERS] Fortran Overhead ( 0 ) : 3.5306s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.4078s for 8192 events => throughput is 2.40E+03 events/s + [COUNTERS] PROGRAM TOTAL : 9.2196s + [COUNTERS] Fortran Overhead ( 0 ) : 4.7253s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.4943s for 8192 events => throughput is 1.82E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -153,7 +158,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -161,9 +166,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438642451704E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 42.2142s - [COUNTERS] Fortran Overhead ( 0 ) : 4.6990s - [COUNTERS] CudaCpp MEs ( 2 ) : 37.5151s for 90112 events => throughput is 2.40E+03 events/s + [COUNTERS] PROGRAM TOTAL : 56.2683s + [COUNTERS] Fortran Overhead ( 0 ) : 6.5201s + [COUNTERS] CudaCpp MEs ( 2 ) : 49.7482s for 90112 events => throughput is 1.81E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -174,14 +179,14 @@ OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438642451704E-004 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.489722e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.868635e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.491645e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.874481e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -196,7 +201,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -204,9 +209,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102367E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 3.7132s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9362s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.7770s for 8192 events => throughput is 4.61E+03 events/s + [COUNTERS] PROGRAM TOTAL : 5.0549s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6552s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.3997s for 8192 events => throughput is 3.41E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -229,7 +234,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -237,9 +242,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438642451701E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 22.5922s - [COUNTERS] Fortran Overhead ( 0 ) : 3.1055s - [COUNTERS] CudaCpp MEs ( 2 ) : 19.4867s for 90112 events => throughput is 4.62E+03 events/s + [COUNTERS] PROGRAM TOTAL : 30.7729s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4162s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.3568s for 90112 events => throughput is 3.42E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -250,14 +255,14 @@ OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438642451701E-004 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.833050e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.598914e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.829430e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.606768e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -272,7 +277,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -280,9 +285,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102367E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 1.7099s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9557s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7541s for 8192 events => throughput is 1.09E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.4142s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3728s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0413s for 8192 events => throughput is 7.87E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -305,7 +310,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -313,9 +318,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 10.4145s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1236s - [COUNTERS] CudaCpp MEs ( 2 ) : 8.2909s for 90112 events => throughput is 1.09E+04 events/s + [COUNTERS] PROGRAM TOTAL : 14.6025s + [COUNTERS] Fortran Overhead ( 0 ) : 3.1268s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.4757s for 90112 events => throughput is 7.85E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -326,14 +331,14 @@ OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438642451712E-004 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.134734e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.082204e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.133067e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.087591e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -348,7 +353,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -356,9 +361,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102367E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 1.5280s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8707s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6573s for 8192 events => throughput is 1.25E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.1594s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2438s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9156s for 8192 events => throughput is 8.95E+03 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -381,7 +386,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -389,9 +394,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 9.2847s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0423s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.2424s for 90112 events => throughput is 1.24E+04 events/s + [COUNTERS] PROGRAM TOTAL : 13.1097s + [COUNTERS] Fortran Overhead ( 0 ) : 3.0095s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.1002s for 90112 events => throughput is 8.92E+03 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -402,14 +407,14 @@ OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438642451712E-004 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.281427e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.157056e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.280000e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.190937e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -424,7 +429,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -432,9 +437,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102367E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 1.0455s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6333s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4122s for 8192 events => throughput is 1.99E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.7205s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5345s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1860s for 8192 events => throughput is 6.91E+03 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -457,7 +462,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -465,9 +470,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 6.3433s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8033s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.5400s for 90112 events => throughput is 1.98E+04 events/s + [COUNTERS] PROGRAM TOTAL : 16.4021s + [COUNTERS] Fortran Overhead ( 0 ) : 3.3045s + [COUNTERS] CudaCpp MEs ( 2 ) : 13.0976s for 90112 events => throughput is 6.88E+03 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -478,17 +483,121 @@ OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438642451712E-004 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.996762e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.943395e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.036396e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.968493e+03 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000387 [3.8703612510102367E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 121 events (found 923 events) + [COUNTERS] PROGRAM TOTAL : 0.9037s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8707s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0330s for 8192 events => throughput is 2.48E+05 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (3.8703612510102356E-004) and cuda (3.8703612510102367E-004) differ by less than 3E-14 (2.220446049250313e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 2.9957s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6318s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3638s for 90112 events => throughput is 2.48E+05 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.5793438642451712E-004) and cuda (1.5793438642451712E-004) differ by less than 3E-14 (0.0) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.275863e+05 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.513394e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.126700e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.163753e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.128674e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.183392e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.130320e+05 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.455396e+05 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 702017d2bd..97726609cd 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2024-05-16_15:33:19 +DATE: 2024-05-16_01:34:39 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 [UNWEIGHT] Wrote 62 events (found 950 events) - [COUNTERS] PROGRAM TOTAL : 3.6781s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2341s - [COUNTERS] Fortran MEs ( 1 ) : 3.4440s for 8192 events => throughput is 2.38E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.7506s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3591s + [COUNTERS] Fortran MEs ( 1 ) : 4.3916s for 8192 events => throughput is 1.87E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 3.6729s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2304s - [COUNTERS] Fortran MEs ( 1 ) : 3.4426s for 8192 events => throughput is 2.38E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.7327s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3562s + [COUNTERS] Fortran MEs ( 1 ) : 4.3765s for 8192 events => throughput is 1.87E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/output_ggttgg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=0 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 39.2820s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4068s - [COUNTERS] Fortran MEs ( 1 ) : 37.8752s for 90112 events => throughput is 2.38E+03 events/s + [COUNTERS] PROGRAM TOTAL : 50.4568s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1771s + [COUNTERS] Fortran MEs ( 1 ) : 48.2797s for 90112 events => throughput is 1.87E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -120,7 +125,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -128,9 +133,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703729438336302E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 6.7566s - [COUNTERS] Fortran Overhead ( 0 ) : 3.4473s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.3094s for 8192 events => throughput is 2.48E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.9135s + [COUNTERS] Fortran Overhead ( 0 ) : 4.5714s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.3421s for 8192 events => throughput is 1.89E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -153,7 +158,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -161,9 +166,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793486626492658E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 41.0433s - [COUNTERS] Fortran Overhead ( 0 ) : 4.6304s - [COUNTERS] CudaCpp MEs ( 2 ) : 36.4129s for 90112 events => throughput is 2.47E+03 events/s + [COUNTERS] PROGRAM TOTAL : 54.1190s + [COUNTERS] Fortran Overhead ( 0 ) : 6.3214s + [COUNTERS] CudaCpp MEs ( 2 ) : 47.7976s for 90112 events => throughput is 1.89E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -174,14 +179,14 @@ OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793486626492658E-004 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.548798e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.947180e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.549982e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.947353e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -196,7 +201,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -204,9 +209,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703722581317850E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 1.9266s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0707s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8559s for 8192 events => throughput is 9.57E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7184s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5261s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1923s for 8192 events => throughput is 6.87E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -229,7 +234,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -237,9 +242,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793483759856148E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 11.7222s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2357s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.4866s for 90112 events => throughput is 9.50E+03 events/s + [COUNTERS] PROGRAM TOTAL : 16.5068s + [COUNTERS] Fortran Overhead ( 0 ) : 3.3022s + [COUNTERS] CudaCpp MEs ( 2 ) : 13.2046s for 90112 events => throughput is 6.82E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -250,14 +255,14 @@ OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793483759856148E-004 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.806237e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.978396e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.796225e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.983793e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -272,7 +277,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -280,9 +285,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703722425602170E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 0.9719s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5943s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3776s for 8192 events => throughput is 2.17E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.4122s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8788s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5334s for 8192 events => throughput is 1.54E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -305,7 +310,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -313,9 +318,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793483698376133E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 5.9221s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7641s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.1580s for 90112 events => throughput is 2.17E+04 events/s + [COUNTERS] PROGRAM TOTAL : 8.5565s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6677s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.8888s for 90112 events => throughput is 1.53E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -326,14 +331,14 @@ OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793483698376133E-004 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.266412e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.577584e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.265132e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.577489e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -348,7 +353,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -356,9 +361,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703722425602170E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 0.8920s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5583s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3337s for 8192 events => throughput is 2.46E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.2916s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8193s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4723s for 8192 events => throughput is 1.73E+04 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -381,7 +386,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -389,9 +394,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793483698376133E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 5.4023s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7278s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.6745s for 90112 events => throughput is 2.45E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.7633s + [COUNTERS] Fortran Overhead ( 0 ) : 2.5957s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.1676s for 90112 events => throughput is 1.74E+04 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -402,14 +407,14 @@ OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793483698376133E-004 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.522397e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.818661e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.523702e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.824534e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -424,7 +429,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -432,9 +437,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703728658657426E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 0.6316s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4285s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2031s for 8192 events => throughput is 4.03E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.5274s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9418s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5856s for 8192 events => throughput is 1.40E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -457,7 +462,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -465,9 +470,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793486977281547E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 3.8543s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6032s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.2511s for 90112 events => throughput is 4.00E+04 events/s + [COUNTERS] PROGRAM TOTAL : 9.1749s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6987s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.4761s for 90112 events => throughput is 1.39E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -478,17 +483,121 @@ OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793486977281547E-004 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.096014e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.413533e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.128816e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.415193e+04 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000387 [3.8703736267486325E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 121 events (found 923 events) + [COUNTERS] PROGRAM TOTAL : 0.8657s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8443s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0214s for 8192 events => throughput is 3.83E+05 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (3.8703612510102356E-004) and cuda (3.8703736267486325E-004) differ by less than 4E-4 (3.1975667371675343e-06) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0001579 [1.5793489323670813E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 2.8423s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6065s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2358s for 90112 events => throughput is 3.82E+05 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.5793438642451712E-004) and cuda (1.5793489323670813E-004) differ by less than 4E-4 (3.20900471706409e-06) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.583101e+05 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.931306e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.570292e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.724382e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.573894e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.720622e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.518798e+05 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.538253e+05 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 67d53f2784..9161616d22 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppsse4 - +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppavx2 + make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2024-05-16_15:35:36 +DATE: 2024-05-16_01:38:10 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 [UNWEIGHT] Wrote 62 events (found 950 events) - [COUNTERS] PROGRAM TOTAL : 3.6775s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2340s - [COUNTERS] Fortran MEs ( 1 ) : 3.4435s for 8192 events => throughput is 2.38E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.7461s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3596s + [COUNTERS] Fortran MEs ( 1 ) : 4.3864s for 8192 events => throughput is 1.87E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 3.6718s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2300s - [COUNTERS] Fortran MEs ( 1 ) : 3.4418s for 8192 events => throughput is 2.38E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.7384s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3570s + [COUNTERS] Fortran MEs ( 1 ) : 4.3814s for 8192 events => throughput is 1.87E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/output_ggttgg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=0 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 39.2889s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4086s - [COUNTERS] Fortran MEs ( 1 ) : 37.8803s for 90112 events => throughput is 2.38E+03 events/s + [COUNTERS] PROGRAM TOTAL : 50.4226s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1630s + [COUNTERS] Fortran MEs ( 1 ) : 48.2596s for 90112 events => throughput is 1.87E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -120,7 +125,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -128,9 +133,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612659176674E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 6.9946s - [COUNTERS] Fortran Overhead ( 0 ) : 3.5542s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.4404s for 8192 events => throughput is 2.38E+03 events/s + [COUNTERS] PROGRAM TOTAL : 9.4283s + [COUNTERS] Fortran Overhead ( 0 ) : 4.8187s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.6095s for 8192 events => throughput is 1.78E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -153,7 +158,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -161,9 +166,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438704534934E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 42.6113s - [COUNTERS] Fortran Overhead ( 0 ) : 4.7229s - [COUNTERS] CudaCpp MEs ( 2 ) : 37.8884s for 90112 events => throughput is 2.38E+03 events/s + [COUNTERS] PROGRAM TOTAL : 56.9207s + [COUNTERS] Fortran Overhead ( 0 ) : 6.6165s + [COUNTERS] CudaCpp MEs ( 2 ) : 50.3042s for 90112 events => throughput is 1.79E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -174,14 +179,14 @@ OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438704534934E-004 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.467157e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.845749e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.466289e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.855084e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -196,7 +201,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -204,9 +209,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612692816703E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 3.7504s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9597s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.7907s for 8192 events => throughput is 4.57E+03 events/s + [COUNTERS] PROGRAM TOTAL : 5.0818s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6854s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.3963s for 8192 events => throughput is 3.42E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -229,7 +234,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -237,9 +242,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438707226035E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 22.8597s - [COUNTERS] Fortran Overhead ( 0 ) : 3.1417s - [COUNTERS] CudaCpp MEs ( 2 ) : 19.7180s for 90112 events => throughput is 4.57E+03 events/s + [COUNTERS] PROGRAM TOTAL : 30.7891s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4469s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.3422s for 90112 events => throughput is 3.42E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -250,14 +255,14 @@ OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438707226035E-004 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.739127e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.522464e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.736551e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.523204e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -272,7 +277,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -280,9 +285,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612675240517E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 1.6555s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9393s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7163s for 8192 events => throughput is 1.14E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.3962s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3623s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0339s for 8192 events => throughput is 7.92E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -305,7 +310,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -313,9 +318,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438703631775E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 9.9956s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1076s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.8880s for 90112 events => throughput is 1.14E+04 events/s + [COUNTERS] PROGRAM TOTAL : 14.5036s + [COUNTERS] Fortran Overhead ( 0 ) : 3.1328s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.3708s for 90112 events => throughput is 7.92E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -326,14 +331,14 @@ OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438703631775E-004 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.160709e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.125066e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.163380e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.074324e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -348,7 +353,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -356,9 +361,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612675240517E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 1.5266s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8679s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6586s for 8192 events => throughput is 1.24E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.1502s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2343s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9159s for 8192 events => throughput is 8.94E+03 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -381,7 +386,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -389,9 +394,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438703631775E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 9.2873s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0370s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.2504s for 90112 events => throughput is 1.24E+04 events/s + [COUNTERS] PROGRAM TOTAL : 12.9975s + [COUNTERS] Fortran Overhead ( 0 ) : 3.0012s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.9962s for 90112 events => throughput is 9.01E+03 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -402,14 +407,14 @@ OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438703631775E-004 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.286158e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.351869e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.287153e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.208773e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -424,7 +429,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -432,9 +437,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612675240517E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 1.0501s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6366s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4135s for 8192 events => throughput is 1.98E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.7414s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5394s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2020s for 8192 events => throughput is 6.82E+03 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -457,7 +462,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -465,9 +470,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438703631775E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 6.3641s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8154s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.5487s for 90112 events => throughput is 1.98E+04 events/s + [COUNTERS] PROGRAM TOTAL : 16.5337s + [COUNTERS] Fortran Overhead ( 0 ) : 3.3003s + [COUNTERS] CudaCpp MEs ( 2 ) : 13.2334s for 90112 events => throughput is 6.81E+03 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -478,17 +483,121 @@ OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438703631775E-004 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.015519e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.881370e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.984971e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.943865e+03 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000387 [3.8703612512203166E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 121 events (found 923 events) + [COUNTERS] PROGRAM TOTAL : 0.9014s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8685s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0329s for 8192 events => throughput is 2.49E+05 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (3.8703612510102356E-004) and cuda (3.8703612512203166E-004) differ by less than 2E-4 (5.427946980773868e-11) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0001579 [1.5793438642387717E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 2.9899s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6262s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3637s for 90112 events => throughput is 2.48E+05 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.5793438642451712E-004) and cuda (1.5793438642387717E-004) differ by less than 2E-4 (4.051980972974434e-12) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.280457e+05 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.523385e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.121733e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.162091e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.125438e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.168234e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.125929e+05 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.451563e+05 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index 6444164bd8..f87c8c9cf1 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2024-05-16_15:38:59 +DATE: 2024-05-16_01:43:56 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 84.9678s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3229s - [COUNTERS] Fortran MEs ( 1 ) : 84.6449s for 8192 events => throughput is 9.68E+01 events/s + [COUNTERS] PROGRAM TOTAL : 101.7235s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5049s + [COUNTERS] Fortran MEs ( 1 ) : 101.2186s for 8192 events => throughput is 8.09E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 85.2157s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3241s - [COUNTERS] Fortran MEs ( 1 ) : 84.8915s for 8192 events => throughput is 9.65E+01 events/s + [COUNTERS] PROGRAM TOTAL : 101.7703s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5109s + [COUNTERS] Fortran MEs ( 1 ) : 101.2594s for 8192 events => throughput is 8.09E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 934.9385s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8187s - [COUNTERS] Fortran MEs ( 1 ) : 932.1198s for 90112 events => throughput is 9.67E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1119.4272s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4297s + [COUNTERS] Fortran MEs ( 1 ) : 1114.9976s for 90112 events => throughput is 8.08E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -120,7 +125,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -128,9 +133,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939193E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 179.3097s - [COUNTERS] Fortran Overhead ( 0 ) : 82.5336s - [COUNTERS] CudaCpp MEs ( 2 ) : 96.7761s for 8192 events => throughput is 8.46E+01 events/s + [COUNTERS] PROGRAM TOTAL : 236.4243s + [COUNTERS] Fortran Overhead ( 0 ) : 108.7156s + [COUNTERS] CudaCpp MEs ( 2 ) : 127.7087s for 8192 events => throughput is 6.41E+01 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -153,7 +158,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -161,9 +166,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1149.6533s - [COUNTERS] Fortran Overhead ( 0 ) : 85.0442s - [COUNTERS] CudaCpp MEs ( 2 ) : 1064.6091s for 90112 events => throughput is 8.46E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1507.2346s + [COUNTERS] Fortran Overhead ( 0 ) : 112.2012s + [COUNTERS] CudaCpp MEs ( 2 ) : 1395.0334s for 90112 events => throughput is 6.46E+01 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -174,14 +179,14 @@ OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656014E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.973924e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.611342e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.984488e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.612518e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -196,7 +201,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -204,9 +209,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939197E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 93.3071s - [COUNTERS] Fortran Overhead ( 0 ) : 43.4204s - [COUNTERS] CudaCpp MEs ( 2 ) : 49.8867s for 8192 events => throughput is 1.64E+02 events/s + [COUNTERS] PROGRAM TOTAL : 114.8253s + [COUNTERS] Fortran Overhead ( 0 ) : 52.9129s + [COUNTERS] CudaCpp MEs ( 2 ) : 61.9124s for 8192 events => throughput is 1.32E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -229,7 +234,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -237,9 +242,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086656017E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 596.0037s - [COUNTERS] Fortran Overhead ( 0 ) : 45.9477s - [COUNTERS] CudaCpp MEs ( 2 ) : 550.0560s for 90112 events => throughput is 1.64E+02 events/s + [COUNTERS] PROGRAM TOTAL : 734.6001s + [COUNTERS] Fortran Overhead ( 0 ) : 56.8950s + [COUNTERS] CudaCpp MEs ( 2 ) : 677.7051s for 90112 events => throughput is 1.33E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -250,14 +255,14 @@ OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656017E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.903688e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.573216e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.904667e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.570652e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -272,7 +277,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -280,9 +285,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 42.6389s - [COUNTERS] Fortran Overhead ( 0 ) : 19.5578s - [COUNTERS] CudaCpp MEs ( 2 ) : 23.0811s for 8192 events => throughput is 3.55E+02 events/s + [COUNTERS] PROGRAM TOTAL : 53.5594s + [COUNTERS] Fortran Overhead ( 0 ) : 24.8692s + [COUNTERS] CudaCpp MEs ( 2 ) : 28.6902s for 8192 events => throughput is 2.86E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -305,7 +310,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -313,9 +318,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 275.3646s - [COUNTERS] Fortran Overhead ( 0 ) : 22.0872s - [COUNTERS] CudaCpp MEs ( 2 ) : 253.2773s for 90112 events => throughput is 3.56E+02 events/s + [COUNTERS] PROGRAM TOTAL : 345.8816s + [COUNTERS] Fortran Overhead ( 0 ) : 28.6165s + [COUNTERS] CudaCpp MEs ( 2 ) : 317.2651s for 90112 events => throughput is 2.84E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -326,14 +331,14 @@ OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656014E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.268981e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.346027e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.265198e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.397864e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -348,7 +353,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -356,9 +361,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 37.5280s - [COUNTERS] Fortran Overhead ( 0 ) : 16.9907s - [COUNTERS] CudaCpp MEs ( 2 ) : 20.5374s for 8192 events => throughput is 3.99E+02 events/s + [COUNTERS] PROGRAM TOTAL : 47.5433s + [COUNTERS] Fortran Overhead ( 0 ) : 21.7991s + [COUNTERS] CudaCpp MEs ( 2 ) : 25.7442s for 8192 events => throughput is 3.18E+02 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -381,7 +386,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -389,9 +394,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 245.8133s - [COUNTERS] Fortran Overhead ( 0 ) : 19.6011s - [COUNTERS] CudaCpp MEs ( 2 ) : 226.2122s for 90112 events => throughput is 3.98E+02 events/s + [COUNTERS] PROGRAM TOTAL : 310.2994s + [COUNTERS] Fortran Overhead ( 0 ) : 25.7446s + [COUNTERS] CudaCpp MEs ( 2 ) : 284.5548s for 90112 events => throughput is 3.17E+02 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -402,14 +407,14 @@ OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656014E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.923204e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.866314e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.917198e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.859864e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -424,7 +429,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -432,9 +437,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 21.3856s - [COUNTERS] Fortran Overhead ( 0 ) : 9.9262s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.4594s for 8192 events => throughput is 7.15E+02 events/s + [COUNTERS] PROGRAM TOTAL : 50.4926s + [COUNTERS] Fortran Overhead ( 0 ) : 24.7479s + [COUNTERS] CudaCpp MEs ( 2 ) : 25.7447s for 8192 events => throughput is 3.18E+02 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -457,7 +462,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -465,9 +470,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 138.4678s - [COUNTERS] Fortran Overhead ( 0 ) : 12.4532s - [COUNTERS] CudaCpp MEs ( 2 ) : 126.0146s for 90112 events => throughput is 7.15E+02 events/s + [COUNTERS] PROGRAM TOTAL : 313.6701s + [COUNTERS] Fortran Overhead ( 0 ) : 28.5625s + [COUNTERS] CudaCpp MEs ( 2 ) : 285.1075s for 90112 events => throughput is 3.16E+02 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -478,17 +483,121 @@ OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656014E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.525681e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.394651e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.578168e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.384790e+02 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.24e-06 [1.2403985227939195E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 70 events (found 407 events) + [COUNTERS] PROGRAM TOTAL : 4.2708s + [COUNTERS] Fortran Overhead ( 0 ) : 3.1879s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0829s for 8192 events => throughput is 7.56E+03 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.2403985227939174E-006) and cuda (1.2403985227939195E-006) differ by less than 3E-14 (1.7763568394002505e-15) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.332e-07 [2.3322993086656006E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 303 events (found 1531 events) + [COUNTERS] PROGRAM TOTAL : 19.1407s + [COUNTERS] Fortran Overhead ( 0 ) : 7.1896s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.9511s for 90112 events => throughput is 7.54E+03 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.3322993086655967E-007) and cuda (2.3322993086656006E-007) differ by less than 3E-14 (1.7763568394002505e-15) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.518899e+03 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.266687e+03 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.285867e+03 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 512 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.577065e+03 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.302340e+03 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.485177e+03 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.239249e+03 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.236704e+03 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index 3cb851a6bf..9938780c0a 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 + make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2024-05-16_16:49:27 +DATE: 2024-05-16_03:16:41 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 84.9806s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3237s - [COUNTERS] Fortran MEs ( 1 ) : 84.6569s for 8192 events => throughput is 9.68E+01 events/s + [COUNTERS] PROGRAM TOTAL : 101.6786s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5071s + [COUNTERS] Fortran MEs ( 1 ) : 101.1715s for 8192 events => throughput is 8.10E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 85.0829s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3251s - [COUNTERS] Fortran MEs ( 1 ) : 84.7578s for 8192 events => throughput is 9.67E+01 events/s + [COUNTERS] PROGRAM TOTAL : 102.1420s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5108s + [COUNTERS] Fortran MEs ( 1 ) : 101.6312s for 8192 events => throughput is 8.06E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 935.4365s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8214s - [COUNTERS] Fortran MEs ( 1 ) : 932.6152s for 90112 events => throughput is 9.66E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1119.6489s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4200s + [COUNTERS] Fortran MEs ( 1 ) : 1115.2289s for 90112 events => throughput is 8.08E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -121,21 +126,21 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.241e-06 [1.2405719950940886E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.241e-06 [1.2405719957040752E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 172.3492s - [COUNTERS] Fortran Overhead ( 0 ) : 79.5418s - [COUNTERS] CudaCpp MEs ( 2 ) : 92.8075s for 8192 events => throughput is 8.83E+01 events/s + [COUNTERS] PROGRAM TOTAL : 207.9761s + [COUNTERS] Fortran Overhead ( 0 ) : 95.5518s + [COUNTERS] CudaCpp MEs ( 2 ) : 112.4243s for 8192 events => throughput is 7.29E+01 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2405719950940886E-006) differ by less than 4E-4 (0.00013985206930144933) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2405719957040752E-006) differ by less than 4E-4 (0.00013985256106807675) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -155,35 +160,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.333e-07 [2.3326290797495657E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.333e-07 [2.3326290771198648E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1103.3837s - [COUNTERS] Fortran Overhead ( 0 ) : 82.1082s - [COUNTERS] CudaCpp MEs ( 2 ) : 1021.2755s for 90112 events => throughput is 8.82E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1342.0233s + [COUNTERS] Fortran Overhead ( 0 ) : 99.5419s + [COUNTERS] CudaCpp MEs ( 2 ) : 1242.4814s for 90112 events => throughput is 7.25E+01 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326290797495657E-007) differ by less than 4E-4 (0.0001413931234055532) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326290771198648E-007) differ by less than 4E-4 (0.00014139199589124907) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.035620e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.627892e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.036074e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.617246e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -199,7 +204,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -207,9 +212,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.241e-06 [1.2405717007921116E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 42.4235s - [COUNTERS] Fortran Overhead ( 0 ) : 19.8408s - [COUNTERS] CudaCpp MEs ( 2 ) : 22.5827s for 8192 events => throughput is 3.63E+02 events/s + [COUNTERS] PROGRAM TOTAL : 52.4912s + [COUNTERS] Fortran Overhead ( 0 ) : 24.8093s + [COUNTERS] CudaCpp MEs ( 2 ) : 27.6819s for 8192 events => throughput is 2.96E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -233,7 +238,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -241,9 +246,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.333e-07 [2.3326284900828787E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 270.7126s - [COUNTERS] Fortran Overhead ( 0 ) : 22.4048s - [COUNTERS] CudaCpp MEs ( 2 ) : 248.3077s for 90112 events => throughput is 3.63E+02 events/s + [COUNTERS] PROGRAM TOTAL : 333.5578s + [COUNTERS] Fortran Overhead ( 0 ) : 28.7441s + [COUNTERS] CudaCpp MEs ( 2 ) : 304.8137s for 90112 events => throughput is 2.96E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -254,14 +259,14 @@ OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326284900828787E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.204257e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.354565e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.203793e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.352919e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -277,7 +282,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -285,9 +290,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.241e-06 [1.2405716659252656E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 21.5813s - [COUNTERS] Fortran Overhead ( 0 ) : 10.0100s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.5714s for 8192 events => throughput is 7.08E+02 events/s + [COUNTERS] PROGRAM TOTAL : 26.9353s + [COUNTERS] Fortran Overhead ( 0 ) : 12.5805s + [COUNTERS] CudaCpp MEs ( 2 ) : 14.3549s for 8192 events => throughput is 5.71E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -311,7 +316,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -319,9 +324,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.333e-07 [2.3326277036840957E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 139.8651s - [COUNTERS] Fortran Overhead ( 0 ) : 12.5495s - [COUNTERS] CudaCpp MEs ( 2 ) : 127.3156s for 90112 events => throughput is 7.08E+02 events/s + [COUNTERS] PROGRAM TOTAL : 175.5244s + [COUNTERS] Fortran Overhead ( 0 ) : 16.6137s + [COUNTERS] CudaCpp MEs ( 2 ) : 158.9107s for 90112 events => throughput is 5.67E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -332,14 +337,14 @@ OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326277036840957E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.487964e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.796719e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.464257e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.813418e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -355,7 +360,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -363,9 +368,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.241e-06 [1.2405716659252656E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 19.0340s - [COUNTERS] Fortran Overhead ( 0 ) : 8.7393s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.2947s for 8192 events => throughput is 7.96E+02 events/s + [COUNTERS] PROGRAM TOTAL : 24.0035s + [COUNTERS] Fortran Overhead ( 0 ) : 11.1136s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.8899s for 8192 events => throughput is 6.36E+02 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -389,7 +394,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -397,9 +402,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.333e-07 [2.3326277036840957E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 124.5246s - [COUNTERS] Fortran Overhead ( 0 ) : 11.2721s - [COUNTERS] CudaCpp MEs ( 2 ) : 113.2525s for 90112 events => throughput is 7.96E+02 events/s + [COUNTERS] PROGRAM TOTAL : 155.2962s + [COUNTERS] Fortran Overhead ( 0 ) : 15.0900s + [COUNTERS] CudaCpp MEs ( 2 ) : 140.2063s for 90112 events => throughput is 6.43E+02 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,14 +415,14 @@ OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326277036840957E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.741863e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.795563e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.765601e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.781198e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -433,7 +438,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -441,9 +446,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.241e-06 [1.2405719306052570E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 10.9582s - [COUNTERS] Fortran Overhead ( 0 ) : 5.1260s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.8322s for 8192 events => throughput is 1.40E+03 events/s + [COUNTERS] PROGRAM TOTAL : 25.3167s + [COUNTERS] Fortran Overhead ( 0 ) : 12.6125s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.7041s for 8192 events => throughput is 6.45E+02 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -467,7 +472,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -475,9 +480,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.333e-07 [2.3326283660088769E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 71.9874s - [COUNTERS] Fortran Overhead ( 0 ) : 7.6592s - [COUNTERS] CudaCpp MEs ( 2 ) : 64.3282s for 90112 events => throughput is 1.40E+03 events/s + [COUNTERS] PROGRAM TOTAL : 157.3576s + [COUNTERS] Fortran Overhead ( 0 ) : 16.6484s + [COUNTERS] CudaCpp MEs ( 2 ) : 140.7092s for 90112 events => throughput is 6.40E+02 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -488,17 +493,121 @@ OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326283660088769E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.700605e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.841550e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.715763e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.830427e+02 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.241e-06 [1.2405722175509512E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 70 events (found 407 events) + [COUNTERS] PROGRAM TOTAL : 2.5511s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0591s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4920s for 8192 events => throughput is 1.66E+04 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.2403985227939174E-006) and cuda (1.2405722175509512E-006) differ by less than 4E-4 (0.00014003141235829908) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.333e-07 [2.3326296967941821E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 303 events (found 1531 events) + [COUNTERS] PROGRAM TOTAL : 11.4353s + [COUNTERS] Fortran Overhead ( 0 ) : 6.0032s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.4321s for 90112 events => throughput is 1.66E+04 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.3322993086655967E-007) and cuda (2.3326296967941821E-007) differ by less than 4E-4 (0.0001416576883412901) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.630624e+04 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.646596e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.329013e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 512 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.359221e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.329144e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.339287e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.285838e+04 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.423096e+03 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 8122ec68e8..9cddd5fe7c 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 + make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2024-05-16_17:44:54 +DATE: 2024-05-16_04:26:13 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 84.9287s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3244s - [COUNTERS] Fortran MEs ( 1 ) : 84.6044s for 8192 events => throughput is 9.68E+01 events/s + [COUNTERS] PROGRAM TOTAL : 101.9892s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5028s + [COUNTERS] Fortran MEs ( 1 ) : 101.4864s for 8192 events => throughput is 8.07E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 84.9670s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3251s - [COUNTERS] Fortran MEs ( 1 ) : 84.6419s for 8192 events => throughput is 9.68E+01 events/s + [COUNTERS] PROGRAM TOTAL : 101.7400s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5055s + [COUNTERS] Fortran MEs ( 1 ) : 101.2345s for 8192 events => throughput is 8.09E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 934.6879s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8314s - [COUNTERS] Fortran MEs ( 1 ) : 931.8565s for 90112 events => throughput is 9.67E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1119.6356s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4254s + [COUNTERS] Fortran MEs ( 1 ) : 1115.2102s for 90112 events => throughput is 8.08E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -120,7 +125,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -128,9 +133,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985299359844E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 181.1858s - [COUNTERS] Fortran Overhead ( 0 ) : 83.5159s - [COUNTERS] CudaCpp MEs ( 2 ) : 97.6700s for 8192 events => throughput is 8.39E+01 events/s + [COUNTERS] PROGRAM TOTAL : 229.4020s + [COUNTERS] Fortran Overhead ( 0 ) : 103.2152s + [COUNTERS] CudaCpp MEs ( 2 ) : 126.1868s for 8192 events => throughput is 6.49E+01 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -153,7 +158,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -161,9 +166,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993212353001E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1160.5951s - [COUNTERS] Fortran Overhead ( 0 ) : 86.1122s - [COUNTERS] CudaCpp MEs ( 2 ) : 1074.4829s for 90112 events => throughput is 8.39E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1528.1049s + [COUNTERS] Fortran Overhead ( 0 ) : 113.9982s + [COUNTERS] CudaCpp MEs ( 2 ) : 1414.1067s for 90112 events => throughput is 6.37E+01 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -174,14 +179,14 @@ OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993212353001E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.852275e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.425842e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.858042e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.948869e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -196,7 +201,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -204,9 +209,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985295828471E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 90.1394s - [COUNTERS] Fortran Overhead ( 0 ) : 41.5547s - [COUNTERS] CudaCpp MEs ( 2 ) : 48.5846s for 8192 events => throughput is 1.69E+02 events/s + [COUNTERS] PROGRAM TOTAL : 117.4242s + [COUNTERS] Fortran Overhead ( 0 ) : 53.6967s + [COUNTERS] CudaCpp MEs ( 2 ) : 63.7275s for 8192 events => throughput is 1.29E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -229,7 +234,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -237,9 +242,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993222645653E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 578.2572s - [COUNTERS] Fortran Overhead ( 0 ) : 44.1460s - [COUNTERS] CudaCpp MEs ( 2 ) : 534.1112s for 90112 events => throughput is 1.69E+02 events/s + [COUNTERS] PROGRAM TOTAL : 756.7451s + [COUNTERS] Fortran Overhead ( 0 ) : 57.7650s + [COUNTERS] CudaCpp MEs ( 2 ) : 698.9802s for 90112 events => throughput is 1.29E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -250,14 +255,14 @@ OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993222645653E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.989726e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.540886e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.989973e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.526888e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -272,7 +277,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -280,9 +285,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 41.0233s - [COUNTERS] Fortran Overhead ( 0 ) : 18.6228s - [COUNTERS] CudaCpp MEs ( 2 ) : 22.4005s for 8192 events => throughput is 3.66E+02 events/s + [COUNTERS] PROGRAM TOTAL : 51.3666s + [COUNTERS] Fortran Overhead ( 0 ) : 23.6472s + [COUNTERS] CudaCpp MEs ( 2 ) : 27.7194s for 8192 events => throughput is 2.96E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -305,7 +310,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -313,9 +318,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 267.4742s - [COUNTERS] Fortran Overhead ( 0 ) : 21.1578s - [COUNTERS] CudaCpp MEs ( 2 ) : 246.3164s for 90112 events => throughput is 3.66E+02 events/s + [COUNTERS] PROGRAM TOTAL : 334.4015s + [COUNTERS] Fortran Overhead ( 0 ) : 27.7321s + [COUNTERS] CudaCpp MEs ( 2 ) : 306.6693s for 90112 events => throughput is 2.94E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -326,14 +331,14 @@ OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993222447204E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.484454e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.517938e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.480754e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.525606e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -348,7 +353,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -356,9 +361,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 36.1836s - [COUNTERS] Fortran Overhead ( 0 ) : 16.3046s - [COUNTERS] CudaCpp MEs ( 2 ) : 19.8790s for 8192 events => throughput is 4.12E+02 events/s + [COUNTERS] PROGRAM TOTAL : 44.9641s + [COUNTERS] Fortran Overhead ( 0 ) : 20.5328s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.4313s for 8192 events => throughput is 3.35E+02 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -381,7 +386,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -389,9 +394,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 237.6060s - [COUNTERS] Fortran Overhead ( 0 ) : 18.8810s - [COUNTERS] CudaCpp MEs ( 2 ) : 218.7250s for 90112 events => throughput is 4.12E+02 events/s + [COUNTERS] PROGRAM TOTAL : 294.2131s + [COUNTERS] Fortran Overhead ( 0 ) : 24.5260s + [COUNTERS] CudaCpp MEs ( 2 ) : 269.6871s for 90112 events => throughput is 3.34E+02 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -402,14 +407,14 @@ OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993222447204E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.135779e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.118767e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.142811e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.099496e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -424,7 +429,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -432,9 +437,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 20.6740s - [COUNTERS] Fortran Overhead ( 0 ) : 9.5775s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.0965s for 8192 events => throughput is 7.38E+02 events/s + [COUNTERS] PROGRAM TOTAL : 49.6272s + [COUNTERS] Fortran Overhead ( 0 ) : 23.9735s + [COUNTERS] CudaCpp MEs ( 2 ) : 25.6537s for 8192 events => throughput is 3.19E+02 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -457,7 +462,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -465,9 +470,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 133.9861s - [COUNTERS] Fortran Overhead ( 0 ) : 12.1371s - [COUNTERS] CudaCpp MEs ( 2 ) : 121.8490s for 90112 events => throughput is 7.40E+02 events/s + [COUNTERS] PROGRAM TOTAL : 308.1385s + [COUNTERS] Fortran Overhead ( 0 ) : 28.1125s + [COUNTERS] CudaCpp MEs ( 2 ) : 280.0260s for 90112 events => throughput is 3.22E+02 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -478,17 +483,121 @@ OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993222447204E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.859816e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.494217e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.872656e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.498492e+02 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.24e-06 [1.2403985217419736E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 70 events (found 407 events) + [COUNTERS] PROGRAM TOTAL : 3.6127s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7479s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8648s for 8192 events => throughput is 9.47E+03 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.2403985227939174E-006) and cuda (1.2403985217419736E-006) differ by less than 2E-4 (8.480691704448873e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.332e-07 [2.3322993078576733E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 303 events (found 1531 events) + [COUNTERS] PROGRAM TOTAL : 16.2177s + [COUNTERS] Fortran Overhead ( 0 ) : 6.7356s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.4821s for 90112 events => throughput is 9.50E+03 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.3322993086655967E-007) and cuda (2.3322993078576733E-007) differ by less than 2E-4 (3.464063480507207e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.422089e+03 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.074505e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.108350e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 512 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.160591e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.110190e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.113742e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.112799e+04 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.647292e+03 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 72e78c5641..c909267a2d 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -1,4 +1,6 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone @@ -6,31 +8,34 @@ make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 - make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2024-05-16_15:38:24 +DATE: 2024-05-16_01:42:38 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1817 events) - [COUNTERS] PROGRAM TOTAL : 0.3140s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2593s - [COUNTERS] Fortran MEs ( 1 ) : 0.0547s for 8192 events => throughput is 1.50E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4817s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4067s + [COUNTERS] Fortran MEs ( 1 ) : 0.0751s for 8192 events => throughput is 1.09E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.2697s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2150s - [COUNTERS] Fortran MEs ( 1 ) : 0.0547s for 8192 events => throughput is 1.50E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4101s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3363s + [COUNTERS] Fortran MEs ( 1 ) : 0.0738s for 8192 events => throughput is 1.11E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/avalassi/output_gqttq_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.6275s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0286s - [COUNTERS] Fortran MEs ( 1 ) : 0.5989s for 90112 events => throughput is 1.50E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3997s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5919s + [COUNTERS] Fortran MEs ( 1 ) : 0.8079s for 90112 events => throughput is 1.12E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -120,7 +125,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -128,9 +133,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263335] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3341s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2745s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0596s for 8192 events => throughput is 1.38E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4922s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4133s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0789s for 8192 events => throughput is 1.04E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -153,7 +158,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -161,9 +166,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561293] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.7464s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0913s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6551s for 90112 events => throughput is 1.38E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.5284s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6490s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8794s for 90112 events => throughput is 1.02E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -174,14 +179,14 @@ OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561293) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.376381e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.038604e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.402553e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.042402e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -196,7 +201,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -204,9 +209,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351262530] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.2779s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2459s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0320s for 8192 events => throughput is 2.56E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4203s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3775s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0428s for 8192 events => throughput is 1.91E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -229,7 +234,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -237,9 +242,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561281] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.4161s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0639s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3521s for 90112 events => throughput is 2.56E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0947s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6149s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4799s for 90112 events => throughput is 1.88E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -250,14 +255,14 @@ OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561281) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.521611e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.949373e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.619957e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.919896e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -272,7 +277,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -280,9 +285,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.2496s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2320s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0176s for 8192 events => throughput is 4.67E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3846s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3601s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0244s for 8192 events => throughput is 3.35E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -305,7 +310,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -313,9 +318,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.2427s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0499s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1927s for 90112 events => throughput is 4.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8777s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6020s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2756s for 90112 events => throughput is 3.27E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -326,14 +331,14 @@ OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561295) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.789149e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.197689e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.838998e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.341628e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -348,7 +353,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -356,9 +361,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.2495s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2326s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0169s for 8192 events => throughput is 4.84E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3821s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3591s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0230s for 8192 events => throughput is 3.56E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -381,7 +386,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -389,9 +394,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.2355s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0496s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1859s for 90112 events => throughput is 4.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8414s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5939s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2475s for 90112 events => throughput is 3.64E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -402,14 +407,14 @@ OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561295) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.954641e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.543777e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.998895e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.667139e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -424,7 +429,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -432,9 +437,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.2441s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2295s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0145s for 8192 events => throughput is 5.64E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4059s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3719s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0340s for 8192 events => throughput is 2.41E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -457,7 +462,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -465,9 +470,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.2056s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0461s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1595s for 90112 events => throughput is 5.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0051s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6219s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3832s for 90112 events => throughput is 2.35E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -478,17 +483,121 @@ OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561295) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.486226e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.349902e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.651722e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.363260e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2711 [0.27110539351263363] fbridge_mode=1 + [UNWEIGHT] Wrote 404 events (found 1228 events) + [COUNTERS] PROGRAM TOTAL : 0.7762s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7756s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.23E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.27110539351263330) and cuda (0.27110539351263363) differ by less than 3E-14 (1.3322676295501878e-15) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2151 [0.21510686556561304] fbridge_mode=1 + [UNWEIGHT] Wrote 1939 events (found 1944 events) + [COUNTERS] PROGRAM TOTAL : 2.0116s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0036s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0080s for 90112 events => throughput is 1.13E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21510686556561295) and cuda (0.21510686556561304) differ by less than 3E-14 (4.440892098500626e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.582112e+07 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.121699e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.529721e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.531629e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.538677e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.807286e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.530045e+07 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.783374e+07 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 8d06046d46..4ac5ec3dc1 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu -make USEBUILDDIR=1 BACKEND=cppnone - -make USEBUILDDIR=1 BACKEND=cpp512y -make USEBUILDDIR=1 BACKEND=cpp512z +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' + +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' + +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2024-05-16_15:38:36 +DATE: 2024-05-16_13:45:22 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1817 events) - [COUNTERS] PROGRAM TOTAL : 0.3140s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2593s - [COUNTERS] Fortran MEs ( 1 ) : 0.0547s for 8192 events => throughput is 1.50E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4857s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4107s + [COUNTERS] Fortran MEs ( 1 ) : 0.0749s for 8192 events => throughput is 1.09E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.2704s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2157s - [COUNTERS] Fortran MEs ( 1 ) : 0.0547s for 8192 events => throughput is 1.50E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4161s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3412s + [COUNTERS] Fortran MEs ( 1 ) : 0.0749s for 8192 events => throughput is 1.09E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/avalassi/output_gqttq_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.6306s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0304s - [COUNTERS] Fortran MEs ( 1 ) : 0.6001s for 90112 events => throughput is 1.50E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.4361s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6155s + [COUNTERS] Fortran MEs ( 1 ) : 0.8206s for 90112 events => throughput is 1.10E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -120,7 +125,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -128,9 +133,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110463093540638] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3248s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2695s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0553s for 8192 events => throughput is 1.48E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4907s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4145s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0762s for 8192 events => throughput is 1.07E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -153,7 +158,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -161,9 +166,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686273216112] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.6926s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0846s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6080s for 90112 events => throughput is 1.48E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.5059s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6664s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8394s for 90112 events => throughput is 1.07E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -174,14 +179,14 @@ OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686273216112) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.483088e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.092440e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.519447e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.094603e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -196,7 +201,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -204,9 +209,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110459152958460] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.2547s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2348s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0199s for 8192 events => throughput is 4.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3934s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3663s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0270s for 8192 events => throughput is 3.03E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -229,7 +234,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -237,9 +242,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510683016166510] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.2695s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0503s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2192s for 90112 events => throughput is 4.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9232s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6228s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3004s for 90112 events => throughput is 3.00E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -250,14 +255,14 @@ OK! xsec from fortran (0.21510686556561295) and cpp (0.21510683016166510) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.996379e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.039638e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.067723e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.034015e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -272,7 +277,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -280,9 +285,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110460595003461] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.2344s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2249s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0095s for 8192 events => throughput is 8.60E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3713s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3577s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0136s for 8192 events => throughput is 6.04E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -305,7 +310,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -313,9 +318,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510682502089912] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.1476s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0430s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1046s for 90112 events => throughput is 8.61E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7502s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6011s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1490s for 90112 events => throughput is 6.05E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -326,14 +331,14 @@ OK! xsec from fortran (0.21510686556561295) and cpp (0.21510682502089912) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.861702e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.923571e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.930775e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.853443e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -348,7 +353,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -356,9 +361,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110460595003461] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.2343s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2251s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0092s for 8192 events => throughput is 8.90E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3638s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3514s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0124s for 8192 events => throughput is 6.61E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -381,7 +386,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -389,9 +394,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510682502089912] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.1442s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0433s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1010s for 90112 events => throughput is 8.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7444s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6060s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1385s for 90112 events => throughput is 6.51E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -402,14 +407,14 @@ OK! xsec from fortran (0.21510686556561295) and cpp (0.21510682502089912) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.196341e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.334346e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.299602e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.476144e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -425,7 +430,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -433,9 +438,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110464176080312] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.2297s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2224s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0073s for 8192 events => throughput is 1.12E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3762s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3583s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0178s for 8192 events => throughput is 4.59E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -459,7 +464,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -467,9 +472,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510685411522326] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.1213s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0407s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0806s for 90112 events => throughput is 1.12E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.8096s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6129s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1967s for 90112 events => throughput is 4.58E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -480,17 +485,121 @@ OK! xsec from fortran (0.21510686556561295) and cpp (0.21510685411522326) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.128056e+06 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.750606e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.146678e+06 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.766894e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2711 [0.27110478167944563] fbridge_mode=1 + [UNWEIGHT] Wrote 404 events (found 1228 events) + [COUNTERS] PROGRAM TOTAL : 0.7793s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7788s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.48E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.27110539351263330) and cuda (0.27110478167944563) differ by less than 4E-4 (2.2568093527297606e-06) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2151 [0.21510689885789414] fbridge_mode=1 + [UNWEIGHT] Wrote 1939 events (found 1944 events) + [COUNTERS] PROGRAM TOTAL : 2.0413s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0348s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0065s for 90112 events => throughput is 1.38E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21510686556561295) and cuda (0.21510689885789414) differ by less than 4E-4 (1.547708907700951e-07) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.566939e+07 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.326602e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.593547e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.720103e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.619232e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.806222e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.144615e+07 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.016256e+07 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index dcca5e7b6d..23f8d1233a 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2024-05-16_15:38:47 +DATE: 2024-05-16_01:43:25 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1817 events) - [COUNTERS] PROGRAM TOTAL : 0.3141s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2593s - [COUNTERS] Fortran MEs ( 1 ) : 0.0547s for 8192 events => throughput is 1.50E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4870s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4120s + [COUNTERS] Fortran MEs ( 1 ) : 0.0750s for 8192 events => throughput is 1.09E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.2729s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2182s - [COUNTERS] Fortran MEs ( 1 ) : 0.0547s for 8192 events => throughput is 1.50E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4171s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3424s + [COUNTERS] Fortran MEs ( 1 ) : 0.0747s for 8192 events => throughput is 1.10E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/avalassi/output_gqttq_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.6294s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0299s - [COUNTERS] Fortran MEs ( 1 ) : 0.5995s for 90112 events => throughput is 1.50E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.4382s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6232s + [COUNTERS] Fortran MEs ( 1 ) : 0.8150s for 90112 events => throughput is 1.11E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -120,7 +125,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -128,9 +133,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539348916002] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3355s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2758s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0597s for 8192 events => throughput is 1.37E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5038s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4232s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0806s for 8192 events => throughput is 1.02E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -153,7 +158,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -161,9 +166,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686560794337] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.7435s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0908s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6527s for 90112 events => throughput is 1.38E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.5722s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6823s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8899s for 90112 events => throughput is 1.01E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -174,14 +179,14 @@ OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686560794337) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.397909e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.030982e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.408152e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.031715e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -196,7 +201,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -204,9 +209,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539348916002] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.2771s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2460s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0311s for 8192 events => throughput is 2.63E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4234s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3811s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0422s for 8192 events => throughput is 1.94E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -229,7 +234,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -237,9 +242,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686560794334] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.4048s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0631s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3417s for 90112 events => throughput is 2.64E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.1135s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6453s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4682s for 90112 events => throughput is 1.92E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -250,14 +255,14 @@ OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686560794334) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.616672e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.925046e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.648955e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.926756e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -272,7 +277,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -280,9 +285,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539330272815] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.2495s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2324s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0171s for 8192 events => throughput is 4.80E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3913s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3660s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0253s for 8192 events => throughput is 3.24E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -305,7 +310,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -313,9 +318,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686558551748] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.2346s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0474s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1872s for 90112 events => throughput is 4.81E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8962s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6193s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2769s for 90112 events => throughput is 3.25E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -326,14 +331,14 @@ OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686558551748) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.902593e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.266154e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.938515e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.269502e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -348,7 +353,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -356,9 +361,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539330272815] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.2487s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2323s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0164s for 8192 events => throughput is 4.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3831s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3610s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0221s for 8192 events => throughput is 3.70E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -381,7 +386,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -389,9 +394,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686558551748] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.2294s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0492s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1802s for 90112 events => throughput is 5.00E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8418s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5992s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2426s for 90112 events => throughput is 3.71E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -402,14 +407,14 @@ OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686558551748) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.088595e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.789065e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.144797e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.854777e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -425,7 +430,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -433,9 +438,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539330272815] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.2516s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2362s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0154s for 8192 events => throughput is 5.33E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4096s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3737s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0358s for 8192 events => throughput is 2.29E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -458,7 +463,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -466,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686558551748] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.2210s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0523s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1688s for 90112 events => throughput is 5.34E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0275s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6170s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4105s for 90112 events => throughput is 2.20E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -479,17 +484,121 @@ OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686558551748) differ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.254596e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.335204e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.419540e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.335548e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2711 [0.27110539343558537] fbridge_mode=1 + [UNWEIGHT] Wrote 404 events (found 1228 events) + [COUNTERS] PROGRAM TOTAL : 0.7731s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7724s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.22E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.27110539351263330) and cuda (0.27110539343558537) differ by less than 2E-4 (2.8419910869104115e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2151 [0.21510686553631395] fbridge_mode=1 + [UNWEIGHT] Wrote 1939 events (found 1944 events) + [COUNTERS] PROGRAM TOTAL : 2.0091s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0010s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0081s for 90112 events => throughput is 1.12E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21510686556561295) and cuda (0.21510686553631395) differ by less than 2E-4 (1.3620671257541517e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.632055e+07 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.046951e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.534300e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.533151e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.529948e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.832656e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.532773e+07 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.792603e+07 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt index a8a31556d0..7b51bb9221 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 - +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' OMP_NUM_THREADS= -DATE: 2024-05-16_18:54:45 +DATE: 2024-05-16_05:58:47 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 3321 events (found 6423 events) - [COUNTERS] PROGRAM TOTAL : 0.6024s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5656s - [COUNTERS] Fortran MEs ( 1 ) : 0.0368s for 8192 events => throughput is 2.23E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9450s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8961s + [COUNTERS] Fortran MEs ( 1 ) : 0.0489s for 8192 events => throughput is 1.68E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.2714s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2346s - [COUNTERS] Fortran MEs ( 1 ) : 0.0368s for 8192 events => throughput is 2.23E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4283s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3794s + [COUNTERS] Fortran MEs ( 1 ) : 0.0489s for 8192 events => throughput is 1.67E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp/avalassi/output_heftggbb_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=0 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.2529s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8482s - [COUNTERS] Fortran MEs ( 1 ) : 0.4047s for 90112 events => throughput is 2.23E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8593s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3225s + [COUNTERS] Fortran MEs ( 1 ) : 0.5368s for 90112 events => throughput is 1.68E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -120,7 +125,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -128,9 +133,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256148] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3007s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2678s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0329s for 8192 events => throughput is 2.49E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4564s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4132s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0431s for 8192 events => throughput is 1.90E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -153,7 +158,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -161,9 +166,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377564] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.2402s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8784s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3619s for 90112 events => throughput is 2.49E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8316s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3535s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4781s for 90112 events => throughput is 1.88E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -174,14 +179,14 @@ OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895240377564) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.532595e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.935091e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.573607e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.955688e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -196,7 +201,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -204,9 +209,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.2710s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2529s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0181s for 8192 events => throughput is 4.52E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4208s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3966s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0242s for 8192 events => throughput is 3.38E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -229,7 +234,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -237,9 +242,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377564] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.0656s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8658s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1997s for 90112 events => throughput is 4.51E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6048s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3350s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2698s for 90112 events => throughput is 3.34E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -250,14 +255,14 @@ OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895240377564) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.469892e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.396840e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.578748e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.435183e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -272,7 +277,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -280,9 +285,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256152] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.2570s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2464s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0106s for 8192 events => throughput is 7.75E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4008s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3863s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0145s for 8192 events => throughput is 5.65E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -305,7 +310,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -313,9 +318,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377560] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 0.9758s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8593s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1166s for 90112 events => throughput is 7.73E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5134s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3430s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1705s for 90112 events => throughput is 5.29E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -326,14 +331,14 @@ OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895240377560) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.020932e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.293625e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.111427e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.214602e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -348,7 +353,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -356,9 +361,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256152] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.2564s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2461s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0102s for 8192 events => throughput is 8.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3999s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3865s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0134s for 8192 events => throughput is 6.10E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -381,7 +386,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -389,9 +394,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377560] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 0.9695s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8578s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1117s for 90112 events => throughput is 8.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4652s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3183s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1469s for 90112 events => throughput is 6.13E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -402,14 +407,14 @@ OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895240377560) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.234254e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.964595e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.383472e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.873725e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -424,7 +429,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -432,9 +437,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256152] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.2545s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2447s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0097s for 8192 events => throughput is 8.42E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4159s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3941s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0218s for 8192 events => throughput is 3.76E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -457,7 +462,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -465,9 +470,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377560] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 0.9639s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8570s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1069s for 90112 events => throughput is 8.43E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5767s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3320s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2446s for 90112 events => throughput is 3.68E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -478,17 +483,121 @@ OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895240377560) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.279955e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.692127e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.296965e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.804832e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.016 [2.0162955499256165] fbridge_mode=1 + [UNWEIGHT] Wrote 1617 events (found 1622 events) + [COUNTERS] PROGRAM TOTAL : 0.8067s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8061s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.36E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.0162955499256161) and cuda (2.0162955499256165) differ by less than 3E-14 (2.220446049250313e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.043 [2.0434895240377573] fbridge_mode=1 + [UNWEIGHT] Wrote 1818 events (found 1823 events) + [COUNTERS] PROGRAM TOTAL : 1.7545s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7475s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0070s for 90112 events => throughput is 1.29E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.0434895240377569) and cuda (2.0434895240377573) differ by less than 3E-14 (2.220446049250313e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.804334e+07 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.230995e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.136468e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.810257e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.115441e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.048562e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.115501e+07 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.748994e+07 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt index 664b9cfbfe..d09b81d7d3 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx + +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 - make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' OMP_NUM_THREADS= -DATE: 2024-05-16_18:54:55 +DATE: 2024-05-16_05:59:14 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 3321 events (found 6423 events) - [COUNTERS] PROGRAM TOTAL : 0.6048s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5680s - [COUNTERS] Fortran MEs ( 1 ) : 0.0368s for 8192 events => throughput is 2.23E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9433s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8939s + [COUNTERS] Fortran MEs ( 1 ) : 0.0495s for 8192 events => throughput is 1.66E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.2729s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2361s - [COUNTERS] Fortran MEs ( 1 ) : 0.0368s for 8192 events => throughput is 2.23E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4256s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3772s + [COUNTERS] Fortran MEs ( 1 ) : 0.0484s for 8192 events => throughput is 1.69E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp/avalassi/output_heftggbb_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=0 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.2537s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8502s - [COUNTERS] Fortran MEs ( 1 ) : 0.4035s for 90112 events => throughput is 2.23E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8568s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3199s + [COUNTERS] Fortran MEs ( 1 ) : 0.5369s for 90112 events => throughput is 1.68E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -120,7 +125,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -128,9 +133,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162897371946169] fbridge_mode=1 [UNWEIGHT] Wrote 1620 events (found 1625 events) - [COUNTERS] PROGRAM TOTAL : 0.2960s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2655s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0305s for 8192 events => throughput is 2.69E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4525s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4113s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0413s for 8192 events => throughput is 1.99E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -138,7 +143,7 @@ OK! xsec from fortran (2.0162955499256161) and cpp (2.0162897371946169) differ b *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** ERROR! events.lhe.cpp.1 and events.lhe.ref.1 differ! -diff /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.cpp.1 /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.ref.1 | head -20 +diff /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.cpp.1 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.ref.1 | head -20 6206,6207c6206,6207 < 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.59936081260E+01 0.59936081260E+01 0.00000000000E+00 0. -1. < 5 1 1 2 501 0 0.45273385612E+02 -0.31131305296E+02 0.47763304676E+03 0.48080583916E+03 0.47000000000E+01 0. 1. diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt index b85cc61a79..291c38991b 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx + + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 - make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' OMP_NUM_THREADS= -DATE: 2024-05-16_18:54:58 +DATE: 2024-05-16_05:59:20 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 3321 events (found 6423 events) - [COUNTERS] PROGRAM TOTAL : 0.6036s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5668s - [COUNTERS] Fortran MEs ( 1 ) : 0.0368s for 8192 events => throughput is 2.22E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9570s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9073s + [COUNTERS] Fortran MEs ( 1 ) : 0.0498s for 8192 events => throughput is 1.65E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.2727s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2359s - [COUNTERS] Fortran MEs ( 1 ) : 0.0368s for 8192 events => throughput is 2.23E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4201s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3715s + [COUNTERS] Fortran MEs ( 1 ) : 0.0486s for 8192 events => throughput is 1.68E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp/avalassi/output_heftggbb_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=0 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.2509s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8468s - [COUNTERS] Fortran MEs ( 1 ) : 0.4041s for 90112 events => throughput is 2.23E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8553s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3195s + [COUNTERS] Fortran MEs ( 1 ) : 0.5358s for 90112 events => throughput is 1.68E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -121,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -129,9 +134,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955975930954] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3085s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2757s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0328s for 8192 events => throughput is 2.50E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4604s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4161s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0443s for 8192 events => throughput is 1.85E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -155,7 +160,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -163,9 +168,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895706383660] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.2408s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8801s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3608s for 90112 events => throughput is 2.50E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8327s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3534s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4793s for 90112 events => throughput is 1.88E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -177,15 +182,15 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.385536e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.817766e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.429119e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.799752e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -201,7 +206,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,9 +214,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955975930958] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.2715s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2535s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0180s for 8192 events => throughput is 4.56E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4227s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3979s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0248s for 8192 events => throughput is 3.30E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -235,7 +240,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -243,9 +248,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895706383669] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.0627s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8648s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1979s for 90112 events => throughput is 4.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6020s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3321s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2699s for 90112 events => throughput is 3.34E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,15 +262,15 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.263258e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.208317e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.354057e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.242147e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,7 +286,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -289,9 +294,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955953691082] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.2558s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2450s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0108s for 8192 events => throughput is 7.56E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4018s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3865s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0153s for 8192 events => throughput is 5.35E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -315,7 +320,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -323,9 +328,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895701243878] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 0.9781s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8586s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1195s for 90112 events => throughput is 7.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4998s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3319s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1679s for 90112 events => throughput is 5.37E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -337,15 +342,15 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.855167e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.916130e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.880927e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.911254e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,7 +366,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -369,9 +374,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955953691082] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.2601s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2496s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0105s for 8192 events => throughput is 7.80E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4012s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3876s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0136s for 8192 events => throughput is 6.01E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,7 +400,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -403,9 +408,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895701243878] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 0.9714s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8562s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1152s for 90112 events => throughput is 7.82E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4747s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3226s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1520s for 90112 events => throughput is 5.93E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -417,15 +422,15 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.059394e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.307270e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.102012e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.162265e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -441,7 +446,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -449,9 +454,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955953691082] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.2564s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2461s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0102s for 8192 events => throughput is 8.00E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4179s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3948s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0231s for 8192 events => throughput is 3.55E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -475,7 +480,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -483,9 +488,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895701243878] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 0.9690s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8574s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1117s for 90112 events => throughput is 8.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5949s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3393s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2556s for 90112 events => throughput is 3.53E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -497,18 +502,122 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.366098e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.332654e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.481957e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.349365e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.016 [2.0162955503257827] fbridge_mode=1 + [UNWEIGHT] Wrote 1617 events (found 1622 events) + [COUNTERS] PROGRAM TOTAL : 0.8124s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8118s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.37E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.0162955499256161) and cuda (2.0162955503257827) differ by less than 2E-4 (1.9846613241725208e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.043 [2.0434895242795732] fbridge_mode=1 + [UNWEIGHT] Wrote 1818 events (found 1823 events) + [COUNTERS] PROGRAM TOTAL : 1.7514s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7444s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0070s for 90112 events => throughput is 1.29E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.0434895240377569) and cuda (2.0434895242795732) differ by less than 2E-4 (1.183348974365117e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.815756e+07 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.247313e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.109600e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.657132e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.111219e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.039865e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.111200e+07 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.771983e+07 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt index 8dfe862e85..80269e77b1 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx + +make USEBUILDDIR=1 BACKEND=cuda + make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y - +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' OMP_NUM_THREADS= -DATE: 2024-05-16_18:55:22 +DATE: 2024-05-16_06:00:18 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 1041 events) - [COUNTERS] PROGRAM TOTAL : 1.9759s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2171s - [COUNTERS] Fortran MEs ( 1 ) : 1.7588s for 8192 events => throughput is 4.66E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6807s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3392s + [COUNTERS] Fortran MEs ( 1 ) : 2.3415s for 8192 events => throughput is 3.50E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.9741s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2156s - [COUNTERS] Fortran MEs ( 1 ) : 1.7585s for 8192 events => throughput is 4.66E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6731s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3387s + [COUNTERS] Fortran MEs ( 1 ) : 2.3344s for 8192 events => throughput is 3.51E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /tmp/avalassi/output_smeftggtttt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > / [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 20.5139s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1701s - [COUNTERS] Fortran MEs ( 1 ) : 19.3438s for 90112 events => throughput is 4.66E+03 events/s + [COUNTERS] PROGRAM TOTAL : 27.7347s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8315s + [COUNTERS] Fortran MEs ( 1 ) : 25.9031s for 90112 events => throughput is 3.48E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -120,7 +125,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -128,9 +133,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 3.9218s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0315s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8903s for 8192 events => throughput is 4.33E+03 events/s + [COUNTERS] PROGRAM TOTAL : 5.3198s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7672s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.5526s for 8192 events => throughput is 3.21E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -153,7 +158,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -161,9 +166,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438187E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 23.7660s - [COUNTERS] Fortran Overhead ( 0 ) : 2.9782s - [COUNTERS] CudaCpp MEs ( 2 ) : 20.7878s for 90112 events => throughput is 4.33E+03 events/s + [COUNTERS] PROGRAM TOTAL : 31.9550s + [COUNTERS] Fortran Overhead ( 0 ) : 4.2173s + [COUNTERS] CudaCpp MEs ( 2 ) : 27.7377s for 90112 events => throughput is 3.25E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -174,14 +179,14 @@ OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668083551438187E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.538676e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.410579e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.540930e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.412797e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -196,7 +201,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -204,9 +209,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084412E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.0838s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1345s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9493s for 8192 events => throughput is 8.63E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.9225s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6149s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.3076s for 8192 events => throughput is 6.27E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -229,7 +234,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -237,9 +242,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 12.5399s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0884s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.4514s for 90112 events => throughput is 8.62E+03 events/s + [COUNTERS] PROGRAM TOTAL : 17.4719s + [COUNTERS] Fortran Overhead ( 0 ) : 3.0716s + [COUNTERS] CudaCpp MEs ( 2 ) : 14.4004s for 90112 events => throughput is 6.26E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -250,14 +255,14 @@ OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668083551438230E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.988476e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.486190e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.934332e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.480385e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -272,7 +277,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -280,9 +285,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.0362s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6247s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4115s for 8192 events => throughput is 1.99E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.4971s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9161s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5810s for 8192 events => throughput is 1.41E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -305,7 +310,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -313,9 +318,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 6.1052s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5780s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.5273s for 90112 events => throughput is 1.99E+04 events/s + [COUNTERS] PROGRAM TOTAL : 8.7948s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3698s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.4250s for 90112 events => throughput is 1.40E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -326,14 +331,14 @@ OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668083551438198E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.019469e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.446474e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.015794e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.453779e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -348,7 +353,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -356,9 +361,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.9608s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5859s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3750s for 8192 events => throughput is 2.18E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.3443s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8339s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5104s for 8192 events => throughput is 1.60E+04 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -381,7 +386,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -389,9 +394,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 5.6614s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5379s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.1235s for 90112 events => throughput is 2.19E+04 events/s + [COUNTERS] PROGRAM TOTAL : 8.0840s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3189s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.7651s for 90112 events => throughput is 1.56E+04 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -402,14 +407,14 @@ OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668083551438198E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.238664e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.657111e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.241558e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.653778e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -424,7 +429,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -432,9 +437,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.6974s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4560s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2414s for 8192 events => throughput is 3.39E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.7114s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0187s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6926s for 8192 events => throughput is 1.18E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -457,7 +462,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -465,9 +470,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 4.0653s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4076s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.6577s for 90112 events => throughput is 3.39E+04 events/s + [COUNTERS] PROGRAM TOTAL : 10.1275s + [COUNTERS] Fortran Overhead ( 0 ) : 2.5284s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.5992s for 90112 events => throughput is 1.19E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -478,17 +483,121 @@ OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668083551438198E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.456123e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.225114e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.469628e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.220944e+04 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 215 events (found 963 events) + [COUNTERS] PROGRAM TOTAL : 0.8326s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8156s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0171s for 8192 events => throughput is 4.80E+05 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9896697955084444E-007) and cuda (7.9896697955084454E-007) differ by less than 3E-14 (2.220446049250313e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1700 events (found 1705 events) + [COUNTERS] PROGRAM TOTAL : 2.4753s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2868s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1885s for 90112 events => throughput is 4.78E+05 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6668083551438230E-007) and cuda (7.6668083551438198E-007) differ by less than 3E-14 (4.440892098500626e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.843300e+05 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.218089e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.155842e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.421655e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.190444e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.415946e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.148629e+05 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.764175e+05 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt index f1c86598c9..45b154f6da 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 + make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' OMP_NUM_THREADS= -DATE: 2024-05-16_18:56:56 +DATE: 2024-05-16_06:02:57 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 1041 events) - [COUNTERS] PROGRAM TOTAL : 1.9756s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2172s - [COUNTERS] Fortran MEs ( 1 ) : 1.7584s for 8192 events => throughput is 4.66E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6694s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3400s + [COUNTERS] Fortran MEs ( 1 ) : 2.3294s for 8192 events => throughput is 3.52E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.9741s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2158s - [COUNTERS] Fortran MEs ( 1 ) : 1.7583s for 8192 events => throughput is 4.66E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6748s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3390s + [COUNTERS] Fortran MEs ( 1 ) : 2.3358s for 8192 events => throughput is 3.51E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /tmp/avalassi/output_smeftggtttt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > / [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 20.5198s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1703s - [COUNTERS] Fortran MEs ( 1 ) : 19.3495s for 90112 events => throughput is 4.66E+03 events/s + [COUNTERS] PROGRAM TOTAL : 27.4771s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8059s + [COUNTERS] Fortran MEs ( 1 ) : 25.6712s for 90112 events => throughput is 3.51E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -120,7 +125,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -128,9 +133,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896785213255034E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 3.8367s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9909s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8458s for 8192 events => throughput is 4.44E+03 events/s + [COUNTERS] PROGRAM TOTAL : 5.0871s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6795s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.4076s for 8192 events => throughput is 3.40E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -153,7 +158,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -161,9 +166,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668138359550833E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 23.2382s - [COUNTERS] Fortran Overhead ( 0 ) : 2.9415s - [COUNTERS] CudaCpp MEs ( 2 ) : 20.2967s for 90112 events => throughput is 4.44E+03 events/s + [COUNTERS] PROGRAM TOTAL : 30.6384s + [COUNTERS] Fortran Overhead ( 0 ) : 4.1272s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.5112s for 90112 events => throughput is 3.40E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -174,14 +179,14 @@ OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668138359550833E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.625362e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.518646e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.630971e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.519140e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -196,7 +201,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -204,9 +209,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896766542858863E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.1744s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6909s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4835s for 8192 events => throughput is 1.69E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.6928s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0131s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6797s for 8192 events => throughput is 1.21E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -229,7 +234,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -237,9 +242,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668121906848987E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 6.9719s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6434s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.3285s for 90112 events => throughput is 1.69E+04 events/s + [COUNTERS] PROGRAM TOTAL : 9.9205s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4645s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.4560s for 90112 events => throughput is 1.21E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -250,14 +255,14 @@ OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668121906848987E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.743379e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.232964e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.737322e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.231409e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -272,7 +277,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -280,9 +285,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896764408326359E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.6365s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4244s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2122s for 8192 events => throughput is 3.86E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.9294s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6306s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2988s for 8192 events => throughput is 2.74E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -305,7 +310,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -313,9 +318,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668124799901306E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 3.7112s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3768s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.3344s for 90112 events => throughput is 3.86E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.3691s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0900s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.2791s for 90112 events => throughput is 2.75E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -326,14 +331,14 @@ OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668124799901306E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.009425e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.797581e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.005235e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.799365e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -348,7 +353,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -356,9 +361,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896764408326359E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.5979s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4052s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1928s for 8192 events => throughput is 4.25E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8582s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5958s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2624s for 8192 events => throughput is 3.12E+04 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -381,7 +386,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -389,9 +394,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668124799901306E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 3.4892s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3700s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.1192s for 90112 events => throughput is 4.25E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.9443s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0456s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.8987s for 90112 events => throughput is 3.11E+04 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -402,14 +407,14 @@ OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668124799901306E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.385408e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.232867e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.384671e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.231460e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -424,7 +429,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -432,9 +437,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896778056937195E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.4608s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3375s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1233s for 8192 events => throughput is 6.64E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.0285s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6828s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3457s for 8192 events => throughput is 2.37E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -457,7 +462,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -465,9 +470,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668139178203571E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 2.6549s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2980s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.3569s for 90112 events => throughput is 6.64E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.9990s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1683s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.8307s for 90112 events => throughput is 2.35E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -478,17 +483,121 @@ OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668139178203571E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.847540e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.399591e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.892703e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.392653e+04 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.99e-07 [7.9896805369365078E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 215 events (found 963 events) + [COUNTERS] PROGRAM TOTAL : 0.8276s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8136s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0140s for 8192 events => throughput is 5.86E+05 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9896697955084444E-007) and cuda (7.9896805369365078E-007) differ by less than 4E-4 (1.3444145174901223e-06) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.667e-07 [7.6668194616292154E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1700 events (found 1705 events) + [COUNTERS] PROGRAM TOTAL : 2.4309s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2769s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1540s for 90112 events => throughput is 5.85E+05 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6668083551438230E-007) and cuda (7.6668194616292154E-007) differ by less than 4E-4 (1.4486452351025747e-06) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.229528e+05 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.512458e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.376887e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.385341e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.353397e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.396782e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.372639e+06 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.860961e+05 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt index 53302fc578..66daeb0e97 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cuda + +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 - make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' OMP_NUM_THREADS= -DATE: 2024-05-16_18:58:14 +DATE: 2024-05-16_06:05:05 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 1041 events) - [COUNTERS] PROGRAM TOTAL : 1.9788s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2188s - [COUNTERS] Fortran MEs ( 1 ) : 1.7600s for 8192 events => throughput is 4.65E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7059s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3465s + [COUNTERS] Fortran MEs ( 1 ) : 2.3593s for 8192 events => throughput is 3.47E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.9800s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2176s - [COUNTERS] Fortran MEs ( 1 ) : 1.7624s for 8192 events => throughput is 4.65E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7035s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3438s + [COUNTERS] Fortran MEs ( 1 ) : 2.3597s for 8192 events => throughput is 3.47E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /tmp/avalassi/output_smeftggtttt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > / [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 20.5105s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1674s - [COUNTERS] Fortran MEs ( 1 ) : 19.3430s for 90112 events => throughput is 4.66E+03 events/s + [COUNTERS] PROGRAM TOTAL : 27.5770s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8193s + [COUNTERS] Fortran MEs ( 1 ) : 25.7577s for 90112 events => throughput is 3.50E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -120,7 +125,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -128,9 +133,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896696375074447E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 3.9498s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0435s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.9063s for 8192 events => throughput is 4.30E+03 events/s + [COUNTERS] PROGRAM TOTAL : 5.3005s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7718s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.5286s for 8192 events => throughput is 3.24E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -153,7 +158,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -161,9 +166,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668081976882373E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 23.9601s - [COUNTERS] Fortran Overhead ( 0 ) : 3.0039s - [COUNTERS] CudaCpp MEs ( 2 ) : 20.9562s for 90112 events => throughput is 4.30E+03 events/s + [COUNTERS] PROGRAM TOTAL : 31.9857s + [COUNTERS] Fortran Overhead ( 0 ) : 4.2191s + [COUNTERS] CudaCpp MEs ( 2 ) : 27.7666s for 90112 events => throughput is 3.25E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -174,14 +179,14 @@ OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668081976882373E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.496946e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.386546e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.495706e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.397002e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -196,7 +201,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -204,9 +209,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896696285825688E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.0599s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1247s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9352s for 8192 events => throughput is 8.76E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.8744s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5934s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2810s for 8192 events => throughput is 6.40E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -229,7 +234,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -237,9 +242,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668081890954375E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 12.3858s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0854s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.3004s for 90112 events => throughput is 8.75E+03 events/s + [COUNTERS] PROGRAM TOTAL : 17.1795s + [COUNTERS] Fortran Overhead ( 0 ) : 3.0414s + [COUNTERS] CudaCpp MEs ( 2 ) : 14.1381s for 90112 events => throughput is 6.37E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -250,14 +255,14 @@ OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668081890954375E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.046465e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.678262e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.046622e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.743588e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -272,7 +277,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -280,9 +285,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896696427369838E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.0405s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6241s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4164s for 8192 events => throughput is 1.97E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.4779s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9015s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5764s for 8192 events => throughput is 1.42E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -305,7 +310,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -313,9 +318,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668082030339872E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 6.1542s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5762s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.5780s for 90112 events => throughput is 1.97E+04 events/s + [COUNTERS] PROGRAM TOTAL : 8.7035s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3540s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.3495s for 90112 events => throughput is 1.42E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -326,14 +331,14 @@ OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668082030339872E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.029073e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.454724e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.028411e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.476512e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -348,7 +353,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -356,9 +361,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896696427369838E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.9537s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5816s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3721s for 8192 events => throughput is 2.20E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.3378s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8314s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5064s for 8192 events => throughput is 1.62E+04 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -381,7 +386,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -389,9 +394,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668082030339872E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 5.6291s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5337s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.0955s for 90112 events => throughput is 2.20E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.8877s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3042s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.5835s for 90112 events => throughput is 1.61E+04 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -402,14 +407,14 @@ OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668082030339872E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.190729e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.679497e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.259030e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.670221e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -424,7 +429,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -432,9 +437,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896696427369838E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.7063s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4622s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2441s for 8192 events => throughput is 3.36E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.7201s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0244s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6957s for 8192 events => throughput is 1.18E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -457,7 +462,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -465,9 +470,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668082030339872E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 4.1083s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4220s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.6863s for 90112 events => throughput is 3.35E+04 events/s + [COUNTERS] PROGRAM TOTAL : 10.1095s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4855s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.6240s for 90112 events => throughput is 1.18E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -478,17 +483,121 @@ OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668082030339872E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.407267e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.204025e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.427980e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.203510e+04 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.99e-07 [7.9896697918297644E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 215 events (found 963 events) + [COUNTERS] PROGRAM TOTAL : 0.8365s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8192s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0172s for 8192 events => throughput is 4.75E+05 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9896697955084444E-007) and cuda (7.9896697918297644E-007) differ by less than 2E-4 (4.6042958334879813e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.667e-07 [7.6668083551547592E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1700 events (found 1705 events) + [COUNTERS] PROGRAM TOTAL : 2.4662s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2768s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1894s for 90112 events => throughput is 4.76E+05 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6668083551438230E-007) and cuda (7.6668083551547592E-007) differ by less than 2E-4 (1.4264145420384011e-12) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.814879e+05 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.185918e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.154361e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.382253e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.160102e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.387193e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.108981e+05 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (there is no GPU on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.750323e+05 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (there is no GPU on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt index 4c0f9c6dec..059122dda6 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' OMP_NUM_THREADS= -DATE: 2024-05-16_18:55:16 +DATE: 2024-05-16_06:00:05 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 685 events (found 2208 events) - [COUNTERS] PROGRAM TOTAL : 0.2660s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2596s - [COUNTERS] Fortran MEs ( 1 ) : 0.0064s for 8192 events => throughput is 1.28E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4148s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4054s + [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.76E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 648 events (found 1275 events) - [COUNTERS] PROGRAM TOTAL : 0.2077s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2013s - [COUNTERS] Fortran MEs ( 1 ) : 0.0064s for 8192 events => throughput is 1.27E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3154s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3059s + [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.68E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /tmp/avalassi/output_susyggt1t1_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4762 [0.47620722822826000] fbridge_mode=0 [UNWEIGHT] Wrote 1784 events (found 1789 events) - [COUNTERS] PROGRAM TOTAL : 0.9314s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8618s - [COUNTERS] Fortran MEs ( 1 ) : 0.0696s for 90112 events => throughput is 1.30E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3773s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2771s + [COUNTERS] Fortran MEs ( 1 ) : 0.1002s for 90112 events => throughput is 8.99E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -120,7 +125,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt index 3960b3117f..01167da954 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cpp512y +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 + +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' OMP_NUM_THREADS= -DATE: 2024-05-16_18:55:18 +DATE: 2024-05-16_06:00:09 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 685 events (found 2208 events) - [COUNTERS] PROGRAM TOTAL : 0.2644s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2580s - [COUNTERS] Fortran MEs ( 1 ) : 0.0064s for 8192 events => throughput is 1.28E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4095s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4004s + [COUNTERS] Fortran MEs ( 1 ) : 0.0092s for 8192 events => throughput is 8.95E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 648 events (found 1275 events) - [COUNTERS] PROGRAM TOTAL : 0.2044s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1979s - [COUNTERS] Fortran MEs ( 1 ) : 0.0064s for 8192 events => throughput is 1.28E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3199s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3104s + [COUNTERS] Fortran MEs ( 1 ) : 0.0095s for 8192 events => throughput is 8.61E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /tmp/avalassi/output_susyggt1t1_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4762 [0.47620722822826000] fbridge_mode=0 [UNWEIGHT] Wrote 1784 events (found 1789 events) - [COUNTERS] PROGRAM TOTAL : 0.9297s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8602s - [COUNTERS] Fortran MEs ( 1 ) : 0.0695s for 90112 events => throughput is 1.30E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3992s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2973s + [COUNTERS] Fortran MEs ( 1 ) : 0.1018s for 90112 events => throughput is 8.85E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -120,7 +125,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt index 3c9483b9b3..6c876298cd 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 - +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' OMP_NUM_THREADS= -DATE: 2024-05-16_18:55:20 +DATE: 2024-05-16_06:00:14 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 685 events (found 2208 events) - [COUNTERS] PROGRAM TOTAL : 0.2659s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2595s - [COUNTERS] Fortran MEs ( 1 ) : 0.0064s for 8192 events => throughput is 1.28E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4113s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4020s + [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.76E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 648 events (found 1275 events) - [COUNTERS] PROGRAM TOTAL : 0.2046s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1982s - [COUNTERS] Fortran MEs ( 1 ) : 0.0064s for 8192 events => throughput is 1.28E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3138s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3043s + [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.67E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /tmp/avalassi/output_susyggt1t1_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4762 [0.47620722822826000] fbridge_mode=0 [UNWEIGHT] Wrote 1784 events (found 1789 events) - [COUNTERS] PROGRAM TOTAL : 0.9294s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8596s - [COUNTERS] Fortran MEs ( 1 ) : 0.0699s for 90112 events => throughput is 1.29E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3758s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2758s + [COUNTERS] Fortran MEs ( 1 ) : 0.1001s for 90112 events => throughput is 9.01E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -120,7 +125,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt index 8a45c6ae77..fd24a61552 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 + make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-05-16_18:55:08 +DATE: 2024-05-16_05:59:47 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 2620 events (found 5403 events) - [COUNTERS] PROGRAM TOTAL : 0.5278s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4954s - [COUNTERS] Fortran MEs ( 1 ) : 0.0324s for 8192 events => throughput is 2.53E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8237s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7798s + [COUNTERS] Fortran MEs ( 1 ) : 0.0440s for 8192 events => throughput is 1.86E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.2715s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2389s - [COUNTERS] Fortran MEs ( 1 ) : 0.0326s for 8192 events => throughput is 2.52E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4191s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3754s + [COUNTERS] Fortran MEs ( 1 ) : 0.0437s for 8192 events => throughput is 1.87E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp/avalassi/output_susyggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.2100s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8554s - [COUNTERS] Fortran MEs ( 1 ) : 0.3547s for 90112 events => throughput is 2.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8130s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3278s + [COUNTERS] Fortran MEs ( 1 ) : 0.4852s for 90112 events => throughput is 1.86E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -120,7 +125,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -128,9 +133,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 171.8 [171.81273026311101] fbridge_mode=1 [UNWEIGHT] Wrote 2338 events (found 3965 events) - [COUNTERS] PROGRAM TOTAL : 0.4518s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4223s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0294s for 8192 events => throughput is 2.78E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7007s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6611s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0395s for 8192 events => throughput is 2.07E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt index a5ee8c5b18..293718b73f 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-05-16_18:55:11 +DATE: 2024-05-16_05:59:53 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 2620 events (found 5403 events) - [COUNTERS] PROGRAM TOTAL : 0.5283s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4959s - [COUNTERS] Fortran MEs ( 1 ) : 0.0324s for 8192 events => throughput is 2.53E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8342s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7896s + [COUNTERS] Fortran MEs ( 1 ) : 0.0446s for 8192 events => throughput is 1.84E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.2717s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2393s - [COUNTERS] Fortran MEs ( 1 ) : 0.0323s for 8192 events => throughput is 2.53E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4178s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3733s + [COUNTERS] Fortran MEs ( 1 ) : 0.0444s for 8192 events => throughput is 1.84E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp/avalassi/output_susyggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.2106s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8554s - [COUNTERS] Fortran MEs ( 1 ) : 0.3552s for 90112 events => throughput is 2.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8125s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3299s + [COUNTERS] Fortran MEs ( 1 ) : 0.4826s for 90112 events => throughput is 1.87E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -120,7 +125,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -128,9 +133,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 171.8 [171.81270286137041] fbridge_mode=1 [UNWEIGHT] Wrote 2338 events (found 3965 events) - [COUNTERS] PROGRAM TOTAL : 0.4544s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4274s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0270s for 8192 events => throughput is 3.04E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7025s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6657s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0367s for 8192 events => throughput is 2.23E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt index 4c55397aef..f9ac9cdc3d 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt @@ -1,36 +1,41 @@ -Working directory (build): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 - make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-05-16_18:55:14 +DATE: 2024-05-16_05:59:59 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: -Working directory (run): /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -45,7 +50,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -53,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 2620 events (found 5403 events) - [COUNTERS] PROGRAM TOTAL : 0.5274s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4950s - [COUNTERS] Fortran MEs ( 1 ) : 0.0323s for 8192 events => throughput is 2.53E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8401s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7957s + [COUNTERS] Fortran MEs ( 1 ) : 0.0444s for 8192 events => throughput is 1.84E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -70,7 +75,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -78,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.2696s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2373s - [COUNTERS] Fortran MEs ( 1 ) : 0.0324s for 8192 events => throughput is 2.53E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4268s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3820s + [COUNTERS] Fortran MEs ( 1 ) : 0.0448s for 8192 events => throughput is 1.83E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -95,7 +100,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp/avalassi/output_susyggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -103,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.2119s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8573s - [COUNTERS] Fortran MEs ( 1 ) : 0.3546s for 90112 events => throughput is 2.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8476s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3594s + [COUNTERS] Fortran MEs ( 1 ) : 0.4881s for 90112 events => throughput is 1.85E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -120,7 +125,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -128,9 +133,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 171.8 [171.81273490068889] fbridge_mode=1 [UNWEIGHT] Wrote 2338 events (found 3965 events) - [COUNTERS] PROGRAM TOTAL : 0.4538s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4239s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0299s for 8192 events => throughput is 2.74E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7032s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6624s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0408s for 8192 events => throughput is 2.01E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index d18921ee68..8f9275e4c9 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_14:54:13 +DATE: 2024-05-16_14:32:11 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.832141e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.963737e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.194654e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.663591 sec +INFO: No Floating Point Exceptions have been reported + 2,552,888,166 cycles # 2.852 GHz + 3,988,214,096 instructions # 1.56 insn per cycle + 0.955666761 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282804e-02 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.427849e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.646988e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.646988e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.026503 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.053499e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.235255e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.235255e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.389071 sec INFO: No Floating Point Exceptions have been reported - 17,588,520,722 cycles # 3.497 GHz - 44,027,100,031 instructions # 2.50 insn per cycle - 5.029781350 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 435) (avx2: 0) (512y: 0) (512z: 0) + 18,286,976,651 cycles # 2.860 GHz + 43,966,894,713 instructions # 2.40 insn per cycle + 6.394304827 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.245017e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.898388e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.898388e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.409137 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.575090e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.062276e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.062276e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.417510 sec INFO: No Floating Point Exceptions have been reported - 11,925,966,926 cycles # 3.496 GHz - 30,984,049,975 instructions # 2.60 insn per cycle - 3.412125095 seconds time elapsed + 12,744,894,916 cycles # 2.883 GHz + 31,001,019,523 instructions # 2.43 insn per cycle + 4.422588286 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1644) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.722325e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.688825e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.688825e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.921646 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.946862e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.717949e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.717949e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.650052 sec INFO: No Floating Point Exceptions have been reported - 9,887,221,571 cycles # 3.382 GHz - 19,250,563,361 instructions # 1.95 insn per cycle - 2.924502262 seconds time elapsed + 10,049,299,034 cycles # 2.750 GHz + 19,366,983,583 instructions # 1.93 insn per cycle + 3.655131055 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1966) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.802769e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.833281e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.833281e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.834217 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.022084e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.853820e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.853820e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.526182 sec INFO: No Floating Point Exceptions have been reported - 9,591,849,238 cycles # 3.382 GHz - 18,670,168,361 instructions # 1.95 insn per cycle - 2.836767218 seconds time elapsed + 9,727,313,452 cycles # 2.755 GHz + 18,976,774,064 instructions # 1.95 insn per cycle + 3.531366474 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1690) (512y: 181) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.517779e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.263169e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.263169e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.379153 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.695166e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.233065e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.233065e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.130967 sec INFO: No Floating Point Exceptions have been reported - 8,086,400,503 cycles # 3.395 GHz - 15,407,515,769 instructions # 1.91 insn per cycle - 2.381976161 seconds time elapsed + 8,580,931,991 cycles # 2.075 GHz + 15,727,945,386 instructions # 1.83 insn per cycle + 4.136130895 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 901) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt index 02c21aaf1a..556a164c58 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt @@ -1,188 +1,227 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_15:12:05 +DATE: 2024-05-16_14:59:51 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.482485e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.592798e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.592798e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 2.321030 sec +INFO: No Floating Point Exceptions have been reported + 7,329,722,152 cycles # 2.840 GHz + 13,178,162,400 instructions # 1.80 insn per cycle + 2.637544426 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282804e-02 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.385573e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.591799e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.591799e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.240714 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.015910e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.185158e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.185158e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.819356 sec INFO: No Floating Point Exceptions have been reported - 18,348,617,166 cycles # 3.499 GHz - 44,271,880,648 instructions # 2.41 insn per cycle - 5.244548893 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 435) (avx2: 0) (512y: 0) (512z: 0) + 19,469,024,508 cycles # 2.853 GHz + 44,194,459,972 instructions # 2.27 insn per cycle + 6.826135735 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.133902e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.717359e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.717359e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.714837 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.484528e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.911785e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.911785e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.870031 sec INFO: No Floating Point Exceptions have been reported - 13,006,962,004 cycles # 3.498 GHz - 31,804,543,831 instructions # 2.45 insn per cycle - 3.718659657 seconds time elapsed + 13,928,289,994 cycles # 2.857 GHz + 31,840,505,402 instructions # 2.29 insn per cycle + 4.876819018 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1644) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.547917e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.391599e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.391599e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.241645 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.770728e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.386706e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.386706e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.194561 sec INFO: No Floating Point Exceptions have been reported - 10,997,993,916 cycles # 3.390 GHz - 20,589,341,691 instructions # 1.87 insn per cycle - 3.245518188 seconds time elapsed + 11,361,909,372 cycles # 2.705 GHz + 20,728,193,515 instructions # 1.82 insn per cycle + 4.201564491 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1966) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.629666e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.521126e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.521126e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.136875 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.837895e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.511052e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.511052e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.060412 sec INFO: No Floating Point Exceptions have been reported - 10,652,887,194 cycles # 3.392 GHz - 20,009,865,369 instructions # 1.88 insn per cycle - 3.140687025 seconds time elapsed + 10,967,372,142 cycles # 2.697 GHz + 20,348,024,135 instructions # 1.86 insn per cycle + 4.067336299 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1690) (512y: 181) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.236908e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.670822e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.670822e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.697812 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.574771e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.036722e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.036722e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.642745 sec INFO: No Floating Point Exceptions have been reported - 9,210,173,022 cycles # 3.409 GHz - 16,530,589,084 instructions # 1.79 insn per cycle - 2.701650823 seconds time elapsed + 9,876,735,144 cycles # 2.125 GHz + 16,873,564,045 instructions # 1.71 insn per cycle + 4.649693422 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 901) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt index 04d239f813..752636bf13 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_15:16:16 +DATE: 2024-05-16_15:10:56 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.514734e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.592124e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.118434e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 1.385277 sec +INFO: No Floating Point Exceptions have been reported + 4,585,820,337 cycles # 2.836 GHz + 7,177,605,134 instructions # 1.57 insn per cycle + 1.675534023 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282804e-02 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.412133e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.628068e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.628068e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.053610e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.236408e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.236408e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.073081 sec +TOTAL : 6.764205 sec INFO: No Floating Point Exceptions have been reported - 17,747,378,428 cycles # 3.497 GHz - 44,027,757,986 instructions # 2.48 insn per cycle - 5.075907933 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 435) (avx2: 0) (512y: 0) (512z: 0) + 19,353,708,257 cycles # 2.859 GHz + 44,070,957,602 instructions # 2.28 insn per cycle + 6.769682162 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.249979e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.900771e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.900771e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.538542e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.015186e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.015186e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.402943 sec +TOTAL : 4.882143 sec INFO: No Floating Point Exceptions have been reported - 11,912,110,688 cycles # 3.498 GHz - 30,984,203,166 instructions # 2.60 insn per cycle - 3.405716771 seconds time elapsed + 13,858,733,837 cycles # 2.836 GHz + 31,001,638,282 instructions # 2.24 insn per cycle + 4.887574523 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1644) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.718087e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.685398e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.685398e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.916460e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.668694e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.668694e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.927995 sec +TOTAL : 4.073472 sec INFO: No Floating Point Exceptions have been reported - 9,910,289,470 cycles # 3.382 GHz - 19,251,026,310 instructions # 1.94 insn per cycle - 2.930797627 seconds time elapsed + 11,156,313,503 cycles # 2.736 GHz + 19,267,334,271 instructions # 1.73 insn per cycle + 4.078862770 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1966) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.791935e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.810280e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.810280e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.001009e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.815270e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.815270e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.840901 sec +TOTAL : 3.939832 sec INFO: No Floating Point Exceptions have been reported - 9,615,157,995 cycles # 3.382 GHz - 18,672,256,452 instructions # 1.94 insn per cycle - 2.843647429 seconds time elapsed + 10,815,664,939 cycles # 2.742 GHz + 18,691,798,772 instructions # 1.73 insn per cycle + 3.945208768 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1690) (512y: 181) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.523847e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.275101e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.275101e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.659550e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.188571e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.188571e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.374393 sec +TOTAL : 4.600299 sec INFO: No Floating Point Exceptions have been reported - 8,074,601,724 cycles # 3.397 GHz - 15,407,700,600 instructions # 1.91 insn per cycle - 2.377184825 seconds time elapsed + 9,711,022,403 cycles # 2.112 GHz + 15,432,876,214 instructions # 1.59 insn per cycle + 4.605929662 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 901) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt index 5f9b7e499d..55f8e65d60 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt @@ -1,128 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_15:16:04 +DATE: 2024-05-16_15:08:14 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.528786e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.598045e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.123971e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 1.009724 sec +INFO: No Floating Point Exceptions have been reported + 3,524,520,771 cycles # 2.832 GHz + 6,986,852,382 instructions # 1.98 insn per cycle + 1.303213412 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282804e-02 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= -/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe: Aborted - 4,227,660 cycles # 3.245 GHz - 6,316,211 instructions # 1.49 insn per cycle - 0.041237880 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 435) (avx2: 0) (512y: 0) (512z: 0) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.055666e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.238246e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.238246e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.381413 sec +INFO: No Floating Point Exceptions have been reported + 18,267,573,257 cycles # 2.861 GHz + 43,966,026,516 instructions # 2.41 insn per cycle + 6.386884750 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= -/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe: Aborted - 4,179,583 cycles # 3.239 GHz - 6,352,819 instructions # 1.52 insn per cycle - 0.038963988 seconds time elapsed +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.556382e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.037173e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.037173e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.468246 sec +INFO: No Floating Point Exceptions have been reported + 12,780,247,489 cycles # 2.858 GHz + 30,998,946,765 instructions # 2.43 insn per cycle + 4.473714210 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1644) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= -/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe: Aborted - 4,172,850 cycles # 2.620 GHz - 6,332,258 instructions # 1.52 insn per cycle - 0.039710095 seconds time elapsed +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.920501e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.673029e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.673029e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.698478 sec +INFO: No Floating Point Exceptions have been reported + 10,077,708,073 cycles # 2.723 GHz + 19,366,955,499 instructions # 1.92 insn per cycle + 3.704491612 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1966) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= -/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe: Aborted - 3,973,250 cycles # 2.596 GHz - 6,339,390 instructions # 1.60 insn per cycle - 0.038745082 seconds time elapsed +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.006263e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.826924e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.826924e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.554344 sec +INFO: No Floating Point Exceptions have been reported + 9,708,097,650 cycles # 2.728 GHz + 18,987,540,468 instructions # 1.96 insn per cycle + 3.559725957 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1690) (512y: 181) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= -/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe: Aborted - 3,997,344 cycles # 2.695 GHz - 6,335,669 instructions # 1.58 insn per cycle - 0.038904403 seconds time elapsed +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.673906e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.201263e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.201263e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.180852 sec +INFO: No Floating Point Exceptions have been reported + 8,572,399,884 cycles # 2.048 GHz + 15,727,509,673 instructions # 1.83 insn per cycle + 4.186463799 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 901) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt index 3524907bed..8320028620 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt @@ -1,183 +1,216 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_15:14:40 +DATE: 2024-05-16_15:05:29 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.845001e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.545626e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.012755e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 1.945484 sec +INFO: No Floating Point Exceptions have been reported + 6,182,952,030 cycles # 2.840 GHz + 11,472,065,600 instructions # 1.86 insn per cycle + 2.234065267 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282804e-02 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.410654e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.627660e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.627660e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.074615 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.053191e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.234940e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.234940e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.394281 sec INFO: No Floating Point Exceptions have been reported - 17,749,993,456 cycles # 3.496 GHz - 44,027,344,775 instructions # 2.48 insn per cycle - 5.077371844 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 435) (avx2: 0) (512y: 0) (512z: 0) + 18,294,474,856 cycles # 2.859 GHz + 43,971,000,114 instructions # 2.40 insn per cycle + 6.399562206 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.248368e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.898949e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.898949e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.408286 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.557791e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.040989e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.040989e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.463787 sec INFO: No Floating Point Exceptions have been reported - 11,929,874,073 cycles # 3.498 GHz - 30,983,626,329 instructions # 2.60 insn per cycle - 3.411178393 seconds time elapsed + 12,772,778,524 cycles # 2.859 GHz + 30,998,712,334 instructions # 2.43 insn per cycle + 4.469196075 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1644) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.717188e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.684291e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.684291e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.928745 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.925684e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.680642e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.680642e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.688587 sec INFO: No Floating Point Exceptions have been reported - 9,914,485,778 cycles # 3.382 GHz - 19,250,668,720 instructions # 1.94 insn per cycle - 2.931625971 seconds time elapsed + 10,072,112,495 cycles # 2.727 GHz + 19,365,616,714 instructions # 1.92 insn per cycle + 3.694022814 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1966) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.799056e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.830181e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.830181e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.843755 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.999182e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.825411e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.825411e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.565445 sec INFO: No Floating Point Exceptions have been reported - 9,625,840,270 cycles # 3.382 GHz - 18,671,694,962 instructions # 1.94 insn per cycle - 2.846615920 seconds time elapsed + 9,737,532,909 cycles # 2.728 GHz + 18,976,607,709 instructions # 1.95 insn per cycle + 3.570830090 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1690) (512y: 181) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.525745e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.268323e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.268323e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.373981 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.671566e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.197159e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.197159e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.185653 sec INFO: No Floating Point Exceptions have been reported - 8,072,986,024 cycles # 3.397 GHz - 15,407,698,309 instructions # 1.91 insn per cycle - 2.376793413 seconds time elapsed + 8,570,262,444 cycles # 2.045 GHz + 15,727,819,138 instructions # 1.84 insn per cycle + 4.191109662 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 901) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt index c549d3b329..6aa3de3ecf 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_14:54:31 +DATE: 2024-05-16_14:32:42 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.832072e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.951586e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.177922e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.666506 sec +INFO: No Floating Point Exceptions have been reported + 2,570,661,306 cycles # 2.841 GHz + 3,994,547,928 instructions # 1.55 insn per cycle + 0.967516454 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282804e-02 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.528086e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.781237e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.781237e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.730345 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.115974e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.322257e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.322257e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.054676 sec INFO: No Floating Point Exceptions have been reported - 16,557,247,391 cycles # 3.498 GHz - 41,872,984,696 instructions # 2.53 insn per cycle - 4.733179835 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 394) (avx2: 0) (512y: 0) (512z: 0) + 17,515,565,744 cycles # 2.891 GHz + 41,813,477,100 instructions # 2.39 insn per cycle + 6.059803806 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 392) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.287098e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.964780e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.964780e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.357673 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.620797e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.138518e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.138518e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.301844 sec INFO: No Floating Point Exceptions have been reported - 11,740,376,151 cycles # 3.495 GHz - 30,144,486,447 instructions # 2.57 insn per cycle - 3.360151754 seconds time elapsed + 12,450,766,554 cycles # 2.891 GHz + 30,161,114,565 instructions # 2.42 insn per cycle + 4.307292943 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1612) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.767086e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.781022e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.781022e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.887003 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.953305e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.731201e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.731201e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.641578 sec INFO: No Floating Point Exceptions have been reported - 9,774,543,201 cycles # 3.383 GHz - 18,982,650,002 instructions # 1.94 insn per cycle - 2.889874818 seconds time elapsed + 9,958,194,708 cycles # 2.732 GHz + 19,097,340,022 instructions # 1.92 insn per cycle + 3.646748223 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1931) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165090E-002 Relative difference = 1.0277089176796747e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.831390e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.882164e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.882164e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.810899 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.030144e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.871070e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.871070e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.516336 sec INFO: No Floating Point Exceptions have been reported - 9,514,166,235 cycles # 3.382 GHz - 18,441,143,108 instructions # 1.94 insn per cycle - 2.813727674 seconds time elapsed + 9,615,329,857 cycles # 2.731 GHz + 18,757,197,169 instructions # 1.95 insn per cycle + 3.521486960 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1662) (512y: 178) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165090E-002 Relative difference = 1.0277089176796747e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.536310e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.303726e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.303726e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.369421 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.716452e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.276447e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.276447e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.085042 sec INFO: No Floating Point Exceptions have been reported - 8,055,861,241 cycles # 3.397 GHz - 15,282,458,066 instructions # 1.90 insn per cycle - 2.372003109 seconds time elapsed + 8,419,965,935 cycles # 2.059 GHz + 15,604,092,420 instructions # 1.85 insn per cycle + 4.090208290 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 887) (512y: 156) (512z: 1239) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt index 881ad88f88..b8b45776b1 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_15:06:32 +DATE: 2024-05-16_14:50:17 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.702175e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.710921e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.156854e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.694390 sec +INFO: No Floating Point Exceptions have been reported + 2,605,646,002 cycles # 2.805 GHz + 4,054,878,448 instructions # 1.56 insn per cycle + 0.990293563 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282804e-02 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.175343e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.727679e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.727679e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.506144 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.570616e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.013848e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.013848e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.427060 sec INFO: No Floating Point Exceptions have been reported - 12,274,429,099 cycles # 3.498 GHz - 32,594,760,258 instructions # 2.66 insn per cycle - 3.509011976 seconds time elapsed + 12,653,335,495 cycles # 2.855 GHz + 32,508,582,789 instructions # 2.57 insn per cycle + 4.432506316 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 296) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.906763e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.092678e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.092678e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.770319 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.997942e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.867798e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.867798e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.572733 sec INFO: No Floating Point Exceptions have been reported - 9,696,571,030 cycles # 3.497 GHz - 24,460,006,230 instructions # 2.52 insn per cycle - 2.773117767 seconds time elapsed + 10,224,022,422 cycles # 2.858 GHz + 24,474,305,392 instructions # 2.39 insn per cycle + 3.578147466 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1251) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.991915e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.206251e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.206251e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.713535 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.166030e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.174429e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.174429e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.325864 sec INFO: No Floating Point Exceptions have been reported - 9,202,322,106 cycles # 3.388 GHz - 16,771,044,980 instructions # 1.82 insn per cycle - 2.716375138 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1623) (512y: 0) (512z: 0) + 9,098,194,590 cycles # 2.732 GHz + 16,922,780,551 instructions # 1.86 insn per cycle + 3.331363940 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1631) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.294367e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.822201e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.822201e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.504424 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.212975e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.265291e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.265291e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.263243 sec INFO: No Floating Point Exceptions have been reported - 8,765,996,140 cycles # 3.497 GHz - 16,017,103,137 instructions # 1.83 insn per cycle - 2.507273198 seconds time elapsed + 8,899,581,855 cycles # 2.723 GHz + 16,332,700,862 instructions # 1.84 insn per cycle + 3.268811314 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1370) (512y: 139) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.713472e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.714893e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.714893e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.285340 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.881513e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.573087e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.573087e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.766130 sec INFO: No Floating Point Exceptions have been reported - 7,781,310,210 cycles # 3.401 GHz - 14,263,612,829 instructions # 1.83 insn per cycle - 2.288259219 seconds time elapsed + 7,870,004,063 cycles # 2.087 GHz + 14,582,523,760 instructions # 1.85 insn per cycle + 3.771527980 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1015) (512y: 158) (512z: 955) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt index 15458a5536..36ca3a055a 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_15:06:47 +DATE: 2024-05-16_14:50:44 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.703728e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.728666e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.213805e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.683308 sec +INFO: No Floating Point Exceptions have been reported + 2,600,218,993 cycles # 2.827 GHz + 4,020,842,023 instructions # 1.55 insn per cycle + 0.979103636 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282804e-02 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.961070e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.108053e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.108053e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.740479 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.089739e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.950650e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.950650e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.429960 sec INFO: No Floating Point Exceptions have been reported - 9,588,624,171 cycles # 3.496 GHz - 25,475,292,972 instructions # 2.66 insn per cycle - 2.743367209 seconds time elapsed + 9,811,818,087 cycles # 2.857 GHz + 25,388,363,151 instructions # 2.59 insn per cycle + 3.435408237 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 249) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.539656e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.478152e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.478152e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.390239 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.308707e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.576242e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.576242e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.150608 sec INFO: No Floating Point Exceptions have been reported - 8,371,663,188 cycles # 3.499 GHz - 21,469,158,286 instructions # 2.56 insn per cycle - 2.393122141 seconds time elapsed + 9,017,040,298 cycles # 2.858 GHz + 21,483,572,468 instructions # 2.38 insn per cycle + 3.156151233 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1119) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.412167e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.091616e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.091616e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.454676 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.317514e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.502934e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.502934e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.137476 sec INFO: No Floating Point Exceptions have been reported - 8,591,996,487 cycles # 3.496 GHz - 15,696,649,387 instructions # 1.83 insn per cycle - 2.458017389 seconds time elapsed + 8,591,075,632 cycles # 2.734 GHz + 15,811,134,800 instructions # 1.84 insn per cycle + 3.143116597 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1494) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.589110e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.477157e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.477157e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.343031 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.364262e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.604788e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.604788e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.081738 sec INFO: No Floating Point Exceptions have been reported - 8,192,410,575 cycles # 3.493 GHz - 15,188,290,742 instructions # 1.85 insn per cycle - 2.345913373 seconds time elapsed + 8,442,051,612 cycles # 2.735 GHz + 15,504,513,991 instructions # 1.84 insn per cycle + 3.087247738 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1268) (512y: 139) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.866424e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.079210e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.079210e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.219053 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.995819e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.803385e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.803385e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.575599 sec INFO: No Floating Point Exceptions have been reported - 7,562,811,698 cycles # 3.405 GHz - 13,965,657,611 instructions # 1.85 insn per cycle - 2.221942369 seconds time elapsed + 7,560,717,738 cycles # 2.112 GHz + 14,283,918,013 instructions # 1.89 insn per cycle + 3.581217674 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1041) (512y: 164) (512z: 874) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index dbd01f373a..e6a48e18ea 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_14:54:48 +DATE: 2024-05-16_14:33:12 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.602023e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.319974e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.288580e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 +TOTAL : 0.573413 sec +INFO: No Floating Point Exceptions have been reported + 2,248,961,780 cycles # 2.823 GHz + 3,510,545,687 instructions # 1.56 insn per cycle + 0.854969378 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282802e-02 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.590876e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.910010e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.910010e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 4.552360 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.082006e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.283429e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.283429e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 6.191643 sec INFO: No Floating Point Exceptions have been reported - 15,929,481,571 cycles # 3.497 GHz - 43,645,354,190 instructions # 2.74 insn per cycle - 4.555018401 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) + 17,740,084,916 cycles # 2.864 GHz + 43,510,870,904 instructions # 2.45 insn per cycle + 6.196630799 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039854866802E-002 Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.137313e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.789699e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.789699e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.593393 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.223628e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.400190e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.400190e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.211334 sec INFO: No Floating Point Exceptions have been reported - 9,079,136,062 cycles # 3.498 GHz - 21,966,157,575 instructions # 2.42 insn per cycle - 2.595953355 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1940) (avx2: 0) (512y: 0) (512z: 0) + 9,244,093,548 cycles # 2.875 GHz + 21,907,620,538 instructions # 2.37 insn per cycle + 3.216400901 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1938) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039280066150E-002 Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.413795e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.060357e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.060357e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.402651 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.419643e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.716778e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.716778e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.967957 sec INFO: No Floating Point Exceptions have been reported - 8,148,048,804 cycles # 3.389 GHz - 15,492,987,780 instructions # 1.90 insn per cycle - 2.404822845 seconds time elapsed + 8,316,472,651 cycles # 2.798 GHz + 15,592,546,873 instructions # 1.87 insn per cycle + 2.972957823 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2596) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.440410e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.116576e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.116576e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.402030 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.415944e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.716005e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.716005e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.973842 sec INFO: No Floating Point Exceptions have been reported - 8,153,583,023 cycles # 3.391 GHz - 15,132,463,188 instructions # 1.86 insn per cycle - 2.404639550 seconds time elapsed + 8,315,082,395 cycles # 2.793 GHz + 15,436,266,122 instructions # 1.86 insn per cycle + 2.978955673 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.526570e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.115568e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.115568e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 1.960575 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.416813e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.677411e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.677411e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.974949 sec INFO: No Floating Point Exceptions have been reported - 6,696,963,172 cycles # 3.412 GHz - 12,564,921,515 instructions # 1.88 insn per cycle - 1.963196726 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1738) (512y: 5) (512z: 1439) + 6,610,937,423 cycles # 2.219 GHz + 12,863,752,208 instructions # 1.95 insn per cycle + 2.980091060 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052585973637E-002 Relative difference = 2.0158743040564767e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt index e292abf6c8..29c604a610 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt @@ -1,188 +1,227 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_15:12:25 +DATE: 2024-05-16_15:00:26 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.941162e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.420865e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.420865e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 +TOTAL : 1.745821 sec +INFO: No Floating Point Exceptions have been reported + 5,608,708,868 cycles # 2.838 GHz + 10,190,752,473 instructions # 1.82 insn per cycle + 2.033892232 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282802e-02 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.571360e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.882241e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.882241e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 4.648614 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.055357e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.251163e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.251163e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 6.442554 sec INFO: No Floating Point Exceptions have been reported - 16,274,421,129 cycles # 3.499 GHz - 43,795,272,918 instructions # 2.69 insn per cycle - 4.652134666 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) + 18,431,262,224 cycles # 2.859 GHz + 43,659,496,470 instructions # 2.37 insn per cycle + 6.448903506 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039854866802E-002 Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.041129e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.508515e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.508515e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.757451 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.110619e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.160777e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.160777e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.492633 sec INFO: No Floating Point Exceptions have been reported - 9,657,450,376 cycles # 3.498 GHz - 23,301,227,828 instructions # 2.41 insn per cycle - 2.760901970 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1940) (avx2: 0) (512y: 0) (512z: 0) + 9,993,500,583 cycles # 2.857 GHz + 23,243,476,984 instructions # 2.33 insn per cycle + 3.498991107 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1938) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039280066150E-002 Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.261327e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.737341e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.737341e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.580566 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.275264e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.392372e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.392372e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 3.272099 sec INFO: No Floating Point Exceptions have been reported - 8,778,518,065 cycles # 3.398 GHz - 16,616,549,598 instructions # 1.89 insn per cycle - 2.583972383 seconds time elapsed + 9,005,707,266 cycles # 2.748 GHz + 16,711,349,389 instructions # 1.86 insn per cycle + 3.278668519 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2596) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.285502e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.783778e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.783778e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.577641 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.297831e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.443567e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.443567e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 3.244587 sec INFO: No Floating Point Exceptions have been reported - 8,769,810,281 cycles # 3.398 GHz - 16,258,423,929 instructions # 1.85 insn per cycle - 2.581094908 seconds time elapsed + 8,928,752,660 cycles # 2.747 GHz + 16,549,135,089 instructions # 1.85 insn per cycle + 3.250993607 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.225378e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.191390e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.191390e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.171708 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.241559e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.306249e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.306249e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 3.324488 sec INFO: No Floating Point Exceptions have been reported - 7,440,255,171 cycles # 3.422 GHz - 13,772,888,504 instructions # 1.85 insn per cycle - 2.175185973 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1738) (512y: 5) (512z: 1439) + 7,378,511,382 cycles # 2.216 GHz + 14,071,008,703 instructions # 1.91 insn per cycle + 3.330848983 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052585973637E-002 Relative difference = 2.0158743040564767e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt index 9f5985de89..8016aaf3c8 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_15:16:34 +DATE: 2024-05-16_15:11:30 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.314779e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.179276e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.254245e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371863e-02 +- 3.269951e-06 ) GeV^0 +TOTAL : 1.213916 sec +INFO: No Floating Point Exceptions have been reported + 4,089,914,869 cycles # 2.847 GHz + 6,594,462,327 instructions # 1.61 insn per cycle + 1.494122889 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282802e-02 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.592846e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.910692e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.910692e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.089350e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.292539e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.292539e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 4.548136 sec +TOTAL : 6.490729 sec INFO: No Floating Point Exceptions have been reported - 15,918,672,194 cycles # 3.498 GHz - 43,645,157,370 instructions # 2.74 insn per cycle - 4.550768739 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) + 18,740,706,935 cycles # 2.886 GHz + 43,689,321,367 instructions # 2.33 insn per cycle + 6.495941000 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039854866802E-002 Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.171803e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.793341e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.793341e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.213519e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.407340e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.407340e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.571626 sec +TOTAL : 3.565494 sec INFO: No Floating Point Exceptions have been reported - 9,002,835,519 cycles # 3.498 GHz - 21,965,803,400 instructions # 2.44 insn per cycle - 2.574182913 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1940) (avx2: 0) (512y: 0) (512z: 0) + 10,288,737,724 cycles # 2.883 GHz + 21,988,558,280 instructions # 2.14 insn per cycle + 3.570732391 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1938) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039280066150E-002 Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.395956e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.063603e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.063603e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.392103e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.656361e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.656361e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.415626 sec +TOTAL : 3.347045 sec INFO: No Floating Point Exceptions have been reported - 8,200,472,498 cycles # 3.392 GHz - 15,495,873,799 instructions # 1.89 insn per cycle - 2.418229335 seconds time elapsed + 9,294,224,919 cycles # 2.774 GHz + 15,502,535,760 instructions # 1.67 insn per cycle + 3.352354405 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2596) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.442464e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.112321e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.112321e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.411853e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.706271e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.706271e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.398280 sec +TOTAL : 3.334245 sec INFO: No Floating Point Exceptions have been reported - 8,137,879,411 cycles # 3.391 GHz - 15,137,769,043 instructions # 1.86 insn per cycle - 2.400787686 seconds time elapsed + 9,229,877,586 cycles # 2.765 GHz + 15,144,508,612 instructions # 1.64 insn per cycle + 3.339505215 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.518730e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.107954e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.107954e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.389642e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.623022e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.623022e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 1.966307 sec +TOTAL : 3.358884 sec INFO: No Floating Point Exceptions have been reported - 6,715,271,102 cycles # 3.411 GHz - 12,564,745,941 instructions # 1.87 insn per cycle - 1.968945583 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1738) (512y: 5) (512z: 1439) + 7,623,474,420 cycles # 2.266 GHz + 12,573,351,599 instructions # 1.65 insn per cycle + 3.364654068 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052585973637E-002 Relative difference = 2.0158743040564767e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt index 406665424a..3bd2ee01ac 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt @@ -1,128 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_15:16:05 +DATE: 2024-05-16_15:08:45 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.323461e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.185145e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.269757e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 +TOTAL : 0.877821 sec +INFO: No Floating Point Exceptions have been reported + 3,113,911,295 cycles # 2.829 GHz + 6,352,740,713 instructions # 2.04 insn per cycle + 1.157340966 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst +==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282802e-02 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= -/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe: Aborted - 4,006,756 cycles # 2.641 GHz - 6,337,589 instructions # 1.58 insn per cycle - 0.038820558 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.080933e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.281840e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.281840e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 6.198018 sec +INFO: No Floating Point Exceptions have been reported + 17,744,200,571 cycles # 2.861 GHz + 43,507,633,337 instructions # 2.45 insn per cycle + 6.203254296 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039854866802E-002 Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= -/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe: Aborted - 4,042,828 cycles # 2.599 GHz - 6,349,685 instructions # 1.57 insn per cycle - 0.039473651 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1940) (avx2: 0) (512y: 0) (512z: 0) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.206413e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.379725e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.379725e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.236598 sec +INFO: No Floating Point Exceptions have been reported + 9,264,626,353 cycles # 2.859 GHz + 21,909,129,569 instructions # 2.36 insn per cycle + 3.241826343 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1938) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039280066150E-002 Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= -/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe: Aborted - 3,980,821 cycles # 2.593 GHz - 6,345,323 instructions # 1.59 insn per cycle - 0.040604688 seconds time elapsed +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.378945e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.622195e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.622195e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 3.023073 sec +INFO: No Floating Point Exceptions have been reported + 8,285,944,372 cycles # 2.737 GHz + 15,591,046,995 instructions # 1.88 insn per cycle + 3.028351636 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2596) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= -/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe: Aborted - 3,992,443 cycles # 2.637 GHz - 6,345,359 instructions # 1.59 insn per cycle - 0.039143335 seconds time elapsed +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.406944e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.689990e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.689990e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.991499 sec +INFO: No Floating Point Exceptions have been reported + 8,215,974,247 cycles # 2.742 GHz + 15,434,394,808 instructions # 1.88 insn per cycle + 2.996736921 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= -/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe: Aborted - 3,962,677 cycles # 2.635 GHz - 6,339,192 instructions # 1.60 insn per cycle - 0.040340396 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1738) (512y: 5) (512z: 1439) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.365448e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.582442e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.582442e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 3.039216 sec +INFO: No Floating Point Exceptions have been reported + 6,609,003,865 cycles # 2.172 GHz + 12,863,939,056 instructions # 1.95 insn per cycle + 3.044504803 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052585973637E-002 Relative difference = 2.0158743040564767e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt index fed66850ae..ef6806658f 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt @@ -1,183 +1,216 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_15:14:58 +DATE: 2024-05-16_15:06:01 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.717298e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.145539e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.143407e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 +TOTAL : 1.533112 sec +INFO: No Floating Point Exceptions have been reported + 4,980,418,158 cycles # 2.839 GHz + 9,119,342,139 instructions # 1.83 insn per cycle + 1.812784805 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282802e-02 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.589425e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.907636e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.907636e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 4.558643 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.069115e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.265540e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.265540e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 6.262605 sec INFO: No Floating Point Exceptions have been reported - 15,933,699,286 cycles # 3.494 GHz - 43,645,740,209 instructions # 2.74 insn per cycle - 4.561263957 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) + 17,921,464,120 cycles # 2.860 GHz + 43,508,155,770 instructions # 2.43 insn per cycle + 6.267871711 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039854866802E-002 Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.166265e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.789294e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.789294e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.576762 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.205993e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.384022e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.384022e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.237394 sec INFO: No Floating Point Exceptions have been reported - 9,021,421,888 cycles # 3.498 GHz - 21,965,682,086 instructions # 2.43 insn per cycle - 2.579422110 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1940) (avx2: 0) (512y: 0) (512z: 0) + 9,270,351,681 cycles # 2.860 GHz + 21,907,147,046 instructions # 2.36 insn per cycle + 3.242634203 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1938) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039280066150E-002 Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.410624e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.067194e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.067194e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.407835 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.379256e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.625966e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.625966e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 3.019248 sec INFO: No Floating Point Exceptions have been reported - 8,170,522,097 cycles # 3.390 GHz - 15,495,739,902 instructions # 1.90 insn per cycle - 2.410473326 seconds time elapsed + 8,297,873,717 cycles # 2.744 GHz + 15,590,905,283 instructions # 1.88 insn per cycle + 3.024598202 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2596) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.440773e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.107588e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.107588e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.400218 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.398987e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.671948e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.671948e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 3.000824 sec INFO: No Floating Point Exceptions have been reported - 8,147,236,501 cycles # 3.391 GHz - 15,137,712,310 instructions # 1.86 insn per cycle - 2.402812237 seconds time elapsed + 8,223,361,227 cycles # 2.736 GHz + 15,434,590,756 instructions # 1.88 insn per cycle + 3.006023707 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.529243e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.119603e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.119603e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 1.960262 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.360241e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.570739e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.570739e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 3.046199 sec INFO: No Floating Point Exceptions have been reported - 6,697,446,385 cycles # 3.413 GHz - 12,564,565,403 instructions # 1.88 insn per cycle - 1.962895171 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1738) (512y: 5) (512z: 1439) + 6,615,724,908 cycles # 2.169 GHz + 12,863,710,849 instructions # 1.94 insn per cycle + 3.051492012 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052585973637E-002 Relative difference = 2.0158743040564767e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt index f38625daf9..b613786442 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_14:55:04 +DATE: 2024-05-16_14:33:39 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.604585e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.336536e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.343718e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 +TOTAL : 0.571057 sec +INFO: No Floating Point Exceptions have been reported + 2,242,744,669 cycles # 2.822 GHz + 3,531,920,926 instructions # 1.57 insn per cycle + 0.851832101 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 95 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282802e-02 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.718462e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.096695e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.096695e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 4.259405 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.153085e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.384746e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.384746e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 5.830325 sec INFO: No Floating Point Exceptions have been reported - 14,906,619,571 cycles # 3.498 GHz - 41,403,712,152 instructions # 2.78 insn per cycle - 4.261977624 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 377) (avx2: 0) (512y: 0) (512z: 0) + 16,691,813,815 cycles # 2.861 GHz + 41,266,181,474 instructions # 2.47 insn per cycle + 5.835359179 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 375) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039854866802E-002 Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.281651e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.052640e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.052640e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.507101 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.304367e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.587722e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.587722e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.109672 sec INFO: No Floating Point Exceptions have been reported - 8,776,093,343 cycles # 3.497 GHz - 21,269,495,730 instructions # 2.42 insn per cycle - 2.509732736 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1845) (avx2: 0) (512y: 0) (512z: 0) + 8,995,426,679 cycles # 2.889 GHz + 21,211,089,826 instructions # 2.36 insn per cycle + 3.114839321 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1843) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039280066150E-002 Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.439328e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.118918e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.118918e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.392823 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.420820e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.716893e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.716893e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.969606 sec INFO: No Floating Point Exceptions have been reported - 8,121,995,124 cycles # 3.391 GHz - 15,330,104,720 instructions # 1.89 insn per cycle - 2.395419581 seconds time elapsed + 8,272,952,138 cycles # 2.782 GHz + 15,425,102,157 instructions # 1.86 insn per cycle + 2.974640700 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2537) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.462402e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.159225e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.159225e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.389124 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.475869e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.854556e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.854556e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.912175 sec INFO: No Floating Point Exceptions have been reported - 8,111,785,966 cycles # 3.391 GHz - 14,942,135,580 instructions # 1.84 insn per cycle - 2.392160016 seconds time elapsed + 8,117,590,540 cycles # 2.783 GHz + 15,233,342,033 instructions # 1.88 insn per cycle + 2.917189383 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2423) (512y: 8) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.526160e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.104377e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.104377e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 1.963768 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.412788e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.668874e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.668874e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.982256 sec INFO: No Floating Point Exceptions have been reported - 6,690,632,313 cycles # 3.407 GHz - 12,541,712,444 instructions # 1.87 insn per cycle - 1.965941647 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1708) (512y: 6) (512z: 1427) + 6,592,409,084 cycles # 2.208 GHz + 12,843,659,599 instructions # 1.95 insn per cycle + 2.987368722 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1706) (512y: 18) (512z: 1427) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052564145764E-002 Relative difference = 1.9988585667912256e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt index 79dfc35e39..e6d46e5965 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_15:07:00 +DATE: 2024-05-16_14:51:09 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.307514e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.195447e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.293637e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 +TOTAL : 0.586214 sec +INFO: No Floating Point Exceptions have been reported + 2,288,759,129 cycles # 2.822 GHz + 3,563,945,826 instructions # 1.56 insn per cycle + 0.869586754 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282802e-02 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.226642e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.906339e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.906339e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 3.422836 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.594420e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.075669e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.075669e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 4.325231 sec INFO: No Floating Point Exceptions have been reported - 11,973,671,245 cycles # 3.497 GHz - 32,591,128,424 instructions # 2.72 insn per cycle - 3.425270504 seconds time elapsed + 12,164,411,956 cycles # 2.810 GHz + 32,427,707,417 instructions # 2.67 insn per cycle + 4.330470336 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 312) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039840314887E-002 Relative difference = 1.244813035273009e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.726722e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.217994e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.217994e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.277045 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.607791e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.429113e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.429113e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 2.796823 sec INFO: No Floating Point Exceptions have been reported - 7,973,162,968 cycles # 3.498 GHz - 18,723,001,222 instructions # 2.35 insn per cycle - 2.279715985 seconds time elapsed + 8,009,111,157 cycles # 2.859 GHz + 18,657,618,704 instructions # 2.33 insn per cycle + 2.802139139 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1555) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039283704129E-002 Relative difference = 5.583829420356249e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.859329e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.134184e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.134184e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.190870 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.719759e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.472647e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.472647e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.690741 sec INFO: No Floating Point Exceptions have been reported - 7,454,401,193 cycles # 3.399 GHz - 14,160,226,764 instructions # 1.90 insn per cycle - 2.193513000 seconds time elapsed + 7,416,864,109 cycles # 2.752 GHz + 14,251,974,045 instructions # 1.92 insn per cycle + 2.696083346 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2237) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053244447801E-002 Relative difference = 2.5291823782248813e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.090151e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.687076e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.687076e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.109079 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.774877e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.630440e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.630440e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.645076 sec INFO: No Floating Point Exceptions have been reported - 7,374,925,228 cycles # 3.493 GHz - 13,649,605,338 instructions # 1.85 insn per cycle - 2.111734544 seconds time elapsed + 7,291,130,406 cycles # 2.752 GHz + 13,948,384,567 instructions # 1.91 insn per cycle + 2.650598467 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2096) (512y: 3) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053244447801E-002 Relative difference = 2.5291823782248813e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.513682e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.047803e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.047803e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 1.965310 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.434115e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.741003e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.741003e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.962177 sec INFO: No Floating Point Exceptions have been reported - 6,714,526,284 cycles # 3.412 GHz - 13,128,070,975 instructions # 1.96 insn per cycle - 1.967953731 seconds time elapsed + 6,479,327,720 cycles # 2.184 GHz + 13,423,401,797 instructions # 2.07 insn per cycle + 2.967420151 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2071) (512y: 1) (512z: 1198) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052562326775E-002 Relative difference = 1.997440588685788e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt index 4d1949ef99..1851f3246c 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_15:07:14 +DATE: 2024-05-16_14:51:33 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.308899e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.200904e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.329787e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 +TOTAL : 0.585570 sec +INFO: No Floating Point Exceptions have been reported + 2,293,480,451 cycles # 2.820 GHz + 3,552,176,680 instructions # 1.55 insn per cycle + 0.870100804 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 95 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282802e-02 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.050290e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.497444e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.497444e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 2.660655 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.129885e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.080551e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.080551e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.330346 sec INFO: No Floating Point Exceptions have been reported - 9,305,990,245 cycles # 3.495 GHz - 25,428,917,364 instructions # 2.73 insn per cycle - 2.663311168 seconds time elapsed + 9,425,530,261 cycles # 2.826 GHz + 25,263,309,757 instructions # 2.68 insn per cycle + 3.335509619 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 263) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039838495897E-002 Relative difference = 1.2589928273811243e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.336793e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.087513e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.087513e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.041698 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.953227e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.493970e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.493970e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 2.514743 sec INFO: No Floating Point Exceptions have been reported - 7,143,134,191 cycles # 3.495 GHz - 16,934,497,640 instructions # 2.37 insn per cycle - 2.044415863 seconds time elapsed + 7,199,101,915 cycles # 2.858 GHz + 16,870,111,415 instructions # 2.34 insn per cycle + 2.520226033 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1360) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039280066150E-002 Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.301147e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.295276e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.295276e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.025295 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.869871e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.903620e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.903620e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.568153 sec INFO: No Floating Point Exceptions have been reported - 7,078,862,679 cycles # 3.491 GHz - 13,524,937,850 instructions # 1.91 insn per cycle - 2.027915407 seconds time elapsed + 7,089,400,745 cycles # 2.756 GHz + 13,616,924,187 instructions # 1.92 insn per cycle + 2.573571442 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2060) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053220800939E-002 Relative difference = 2.5107486628541925e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.391243e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.540215e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.540215e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.004933 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.911116e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.034757e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.034757e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.534380 sec INFO: No Floating Point Exceptions have been reported - 7,005,576,991 cycles # 3.491 GHz - 13,133,179,972 instructions # 1.87 insn per cycle - 2.007248760 seconds time elapsed + 7,042,060,221 cycles # 2.774 GHz + 13,426,671,587 instructions # 1.91 insn per cycle + 2.539847169 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1945) (512y: 4) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053220800939E-002 Relative difference = 2.5107486628541925e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.666850e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.557261e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.557261e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 1.919696 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.525985e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.967300e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.967300e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.871748 sec INFO: No Floating Point Exceptions have been reported - 6,563,329,807 cycles # 3.415 GHz - 12,859,576,357 instructions # 1.96 insn per cycle - 1.922379873 seconds time elapsed + 6,325,625,286 cycles # 2.199 GHz + 13,154,721,049 instructions # 2.08 insn per cycle + 2.877120825 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2029) (512y: 1) (512z: 1083) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052536860923E-002 Relative difference = 1.977588895209662e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index d9c97c18b0..b626a014f8 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_14:55:19 +DATE: 2024-05-16_14:34:05 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.830025e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.944832e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.160865e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.662041 sec +INFO: No Floating Point Exceptions have been reported + 2,559,219,510 cycles # 2.861 GHz + 3,969,506,530 instructions # 1.55 insn per cycle + 0.952802853 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282804e-02 +Avg ME (F77/GPU) = 1.2828039901590279E-002 +Relative difference = 7.671454200650844e-09 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.378447e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.583594e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.583594e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.178071 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.044346e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.221236e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.221236e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.439706 sec INFO: No Floating Point Exceptions have been reported - 18,116,948,400 cycles # 3.497 GHz - 44,279,589,774 instructions # 2.44 insn per cycle - 5.180853981 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 441) (avx2: 0) (512y: 0) (512z: 0) + 18,648,827,254 cycles # 2.894 GHz + 44,218,351,924 instructions # 2.37 insn per cycle + 6.444755062 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 439) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.281421e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.956307e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.956307e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.366088 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.634240e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.158489e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.158489e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.270020 sec INFO: No Floating Point Exceptions have been reported - 11,777,222,914 cycles # 3.497 GHz - 30,827,112,234 instructions # 2.62 insn per cycle - 3.368562047 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1691) (avx2: 0) (512y: 0) (512z: 0) + 12,337,216,169 cycles # 2.886 GHz + 30,918,100,190 instructions # 2.51 insn per cycle + 4.275170664 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1685) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.736561e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.717816e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.717816e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.909518 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.943703e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.696046e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.696046e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.651391 sec INFO: No Floating Point Exceptions have been reported - 9,848,016,253 cycles # 3.382 GHz - 19,171,423,006 instructions # 1.95 insn per cycle - 2.912299252 seconds time elapsed + 10,097,284,751 cycles # 2.762 GHz + 19,374,074,587 instructions # 1.92 insn per cycle + 3.656592402 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2130) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.854841e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.934985e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.934985e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.797845 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.039225e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.880994e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.880994e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.498933 sec INFO: No Floating Point Exceptions have been reported - 9,472,574,926 cycles # 3.383 GHz - 18,622,411,462 instructions # 1.97 insn per cycle - 2.800624578 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1863) (512y: 188) (512z: 0) + 9,699,890,764 cycles # 2.769 GHz + 18,944,296,026 instructions # 1.95 insn per cycle + 3.504313379 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1860) (512y: 188) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.630799e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.513671e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.513671e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.322462 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.766168e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.359450e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.359450e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.977432 sec INFO: No Floating Point Exceptions have been reported - 7,905,195,508 cycles # 3.400 GHz - 14,645,416,360 instructions # 1.85 insn per cycle - 2.325199959 seconds time elapsed + 8,362,626,878 cycles # 2.101 GHz + 15,058,722,791 instructions # 1.80 insn per cycle + 3.982532855 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1024) (512y: 155) (512z: 1316) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt index 65b806c1d2..f9780717c1 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_14:55:36 +DATE: 2024-05-16_14:34:35 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.831074e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.944999e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.163112e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.663702 sec +INFO: No Floating Point Exceptions have been reported + 2,550,713,530 cycles # 2.845 GHz + 3,995,712,636 instructions # 1.57 insn per cycle + 0.958037940 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282804e-02 +Avg ME (F77/GPU) = 1.2828039901590279E-002 +Relative difference = 7.671454200650844e-09 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.456800e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.687118e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.687118e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.935224 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.088480e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.281697e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.281697e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.193535 sec INFO: No Floating Point Exceptions have been reported - 17,240,969,832 cycles # 3.495 GHz - 42,526,916,761 instructions # 2.47 insn per cycle - 4.937746362 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 423) (avx2: 0) (512y: 0) (512z: 0) + 17,967,058,694 cycles # 2.899 GHz + 42,467,805,223 instructions # 2.36 insn per cycle + 6.198684795 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 421) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.367141e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.103399e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.103399e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.265876 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.676284e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.231904e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.231904e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.170684 sec INFO: No Floating Point Exceptions have been reported - 11,424,467,425 cycles # 3.496 GHz - 30,110,081,669 instructions # 2.64 insn per cycle - 3.268663149 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1698) (avx2: 0) (512y: 0) (512z: 0) + 12,134,694,075 cycles # 2.906 GHz + 30,224,929,059 instructions # 2.49 insn per cycle + 4.175943490 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1692) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.777925e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.800468e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.800468e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.882551 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.950812e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.735198e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.735198e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.639278 sec INFO: No Floating Point Exceptions have been reported - 9,762,346,673 cycles # 3.384 GHz - 19,123,478,649 instructions # 1.96 insn per cycle - 2.885422638 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2149) (512y: 0) (512z: 0) + 10,078,657,444 cycles # 2.766 GHz + 19,257,126,653 instructions # 1.91 insn per cycle + 3.644365244 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2146) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.923402e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.059057e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.059057e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.739345 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.049769e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.898049e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.898049e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.481211 sec INFO: No Floating Point Exceptions have been reported - 9,280,284,072 cycles # 3.385 GHz - 18,421,211,222 instructions # 1.98 insn per cycle - 2.742071393 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1837) (512y: 191) (512z: 0) + 9,647,917,970 cycles # 2.768 GHz + 18,746,418,128 instructions # 1.94 insn per cycle + 3.486360008 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1834) (512y: 191) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.654222e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.566246e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.566246e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.310726 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.796433e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.409552e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.409552e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.919050 sec INFO: No Floating Point Exceptions have been reported - 7,863,763,045 cycles # 3.399 GHz - 14,563,021,097 instructions # 1.85 insn per cycle - 2.313550512 seconds time elapsed + 8,244,471,456 cycles # 2.102 GHz + 14,980,246,059 instructions # 1.82 insn per cycle + 3.924194596 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1021) (512y: 156) (512z: 1305) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index 2776c79ca9..205a4bf5b6 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_14:55:54 +DATE: 2024-05-16_14:35:05 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.201162e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.181610e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.277713e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.525559 sec +INFO: No Floating Point Exceptions have been reported + 2,155,305,398 cycles # 2.849 GHz + 3,120,666,963 instructions # 1.45 insn per cycle + 0.814520269 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028807e+00 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.789464e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.864452e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.864452e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.886517 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.068773e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.129905e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.129905e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.176250 sec INFO: No Floating Point Exceptions have been reported - 13,600,504,995 cycles # 3.497 GHz - 38,380,906,031 instructions # 2.82 insn per cycle - 3.889317912 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 674) (avx2: 0) (512y: 0) (512z: 0) + 15,001,077,825 cycles # 2.896 GHz + 38,374,710,401 instructions # 2.56 insn per cycle + 5.181415080 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515645 Relative difference = 3.258803994438787e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.845691e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.093226e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.093226e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.290221 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.492980e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.684039e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.684039e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.109528 sec INFO: No Floating Point Exceptions have been reported - 8,019,541,175 cycles # 3.498 GHz - 24,571,068,753 instructions # 3.06 insn per cycle - 2.293071476 seconds time elapsed + 9,049,547,879 cycles # 2.906 GHz + 24,578,150,431 instructions # 2.72 insn per cycle + 3.114795475 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 7.867637e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.519099e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.519099e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.460379 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.554648e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.034559e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.034559e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 1.996517 sec INFO: No Floating Point Exceptions have been reported - 4,872,147,779 cycles # 3.330 GHz - 11,227,733,615 instructions # 2.30 insn per cycle - 1.463257220 seconds time elapsed + 5,443,502,791 cycles # 2.721 GHz + 11,251,469,346 instructions # 2.07 insn per cycle + 2.001703471 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 8.110279e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.793548e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.793548e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.415836 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.119114e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.713742e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.713742e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 1.821745 sec INFO: No Floating Point Exceptions have been reported - 4,723,686,789 cycles # 3.330 GHz - 10,632,343,497 instructions # 2.25 insn per cycle - 1.418646127 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2125) (512y: 124) (512z: 0) + 4,960,408,882 cycles # 2.716 GHz + 10,558,806,229 instructions # 2.13 insn per cycle + 1.826903839 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2074) (512y: 144) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 8.311347e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.018241e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.018241e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.384756 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.693426e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.898518e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.898518e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.947911 sec INFO: No Floating Point Exceptions have been reported - 4,617,023,596 cycles # 3.328 GHz - 7,671,857,273 instructions # 1.66 insn per cycle - 1.387628052 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1438) (512y: 100) (512z: 1540) + 5,367,244,097 cycles # 1.818 GHz + 7,793,958,391 instructions # 1.45 insn per cycle + 2.953294554 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt index 9f614d2e92..4b2366d44f 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt @@ -1,188 +1,227 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:12:41 +DATE: 2024-05-16_15:00:55 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.373758e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.924060e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.924060e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.825705 sec +INFO: No Floating Point Exceptions have been reported + 3,037,157,201 cycles # 2.832 GHz + 4,768,877,833 instructions # 1.57 insn per cycle + 1.128818887 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028807e+00 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.779713e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.854203e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.854203e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.941033 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.032947e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.092197e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.092197e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.344113 sec INFO: No Floating Point Exceptions have been reported - 13,799,747,574 cycles # 3.498 GHz - 38,446,512,754 instructions # 2.79 insn per cycle - 3.944972423 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 674) (avx2: 0) (512y: 0) (512z: 0) + 15,315,317,736 cycles # 2.863 GHz + 38,433,762,310 instructions # 2.51 insn per cycle + 5.351126978 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515645 Relative difference = 3.258803994438787e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.784034e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.025706e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.025706e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.374177 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.394451e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.578816e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.578816e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.279010 sec INFO: No Floating Point Exceptions have been reported - 8,313,425,452 cycles # 3.496 GHz - 24,750,069,495 instructions # 2.98 insn per cycle - 2.378016368 seconds time elapsed + 9,390,215,737 cycles # 2.859 GHz + 24,761,602,813 instructions # 2.64 insn per cycle + 3.285914811 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 7.721964e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.347312e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.347312e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.539706 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.346272e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.804430e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.804430e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.151067 sec INFO: No Floating Point Exceptions have been reported - 5,151,677,249 cycles # 3.338 GHz - 11,509,930,615 instructions # 2.23 insn per cycle - 1.543538979 seconds time elapsed + 5,795,064,676 cycles # 2.687 GHz + 11,538,955,643 instructions # 1.99 insn per cycle + 2.157987463 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 7.929625e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.588534e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.588534e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.503019 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.949125e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.512113e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.512113e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 1.953091 sec INFO: No Floating Point Exceptions have been reported - 5,031,273,010 cycles # 3.339 GHz - 10,912,509,093 instructions # 2.17 insn per cycle - 1.506915320 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2125) (512y: 124) (512z: 0) + 5,277,608,562 cycles # 2.695 GHz + 10,845,633,589 instructions # 2.06 insn per cycle + 1.960046746 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2074) (512y: 144) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 8.182703e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.868825e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.868825e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.462204 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.545325e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.736253e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.736253e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.149111 sec INFO: No Floating Point Exceptions have been reported - 4,894,621,540 cycles # 3.339 GHz - 7,911,118,728 instructions # 1.62 insn per cycle - 1.466076132 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1438) (512y: 100) (512z: 1540) + 5,725,568,726 cycles # 1.815 GHz + 8,037,864,149 instructions # 1.40 insn per cycle + 3.156036160 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt index 7960bd4fc7..66fdf9efe4 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:16:49 +DATE: 2024-05-16_15:11:59 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.582142e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.158915e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.274993e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 0.629579 sec +INFO: No Floating Point Exceptions have been reported + 2,438,671,292 cycles # 2.828 GHz + 3,557,518,240 instructions # 1.46 insn per cycle + 0.918692112 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028807e+00 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.792705e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.867470e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.867470e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.063642e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.124319e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.124319e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.882179 sec +TOTAL : 5.250994 sec INFO: No Floating Point Exceptions have been reported - 13,590,452,132 cycles # 3.499 GHz - 38,381,415,159 instructions # 2.82 insn per cycle - 3.884999642 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 674) (avx2: 0) (512y: 0) (512z: 0) + 15,177,224,624 cycles # 2.888 GHz + 38,389,589,114 instructions # 2.53 insn per cycle + 5.256694767 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515645 Relative difference = 3.258803994438787e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.818661e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.062840e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.062840e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.469246e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.659787e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.659787e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.302066 sec +TOTAL : 3.192043 sec INFO: No Floating Point Exceptions have been reported - 8,054,845,661 cycles # 3.495 GHz - 24,571,174,273 instructions # 3.05 insn per cycle - 2.304828748 seconds time elapsed + 9,234,869,625 cycles # 2.889 GHz + 24,577,322,685 instructions # 2.66 insn per cycle + 3.197667860 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 7.811961e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.456700e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.456700e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.520662e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.999169e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.999169e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.470660 sec +TOTAL : 2.069942 sec INFO: No Floating Point Exceptions have been reported - 4,906,328,782 cycles # 3.330 GHz - 11,227,802,708 instructions # 2.29 insn per cycle - 1.473487930 seconds time elapsed + 5,642,462,557 cycles # 2.720 GHz + 11,233,692,701 instructions # 1.99 insn per cycle + 2.075542898 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 8.128991e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.819245e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.819245e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.151383e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.740134e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.740134e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.414626 sec +TOTAL : 1.875924 sec INFO: No Floating Point Exceptions have been reported - 4,719,736,167 cycles # 3.330 GHz - 10,630,404,841 instructions # 2.25 insn per cycle - 1.417503853 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2125) (512y: 124) (512z: 0) + 5,122,190,825 cycles # 2.724 GHz + 10,508,387,782 instructions # 2.05 insn per cycle + 1.881606947 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2074) (512y: 144) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 8.329901e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.039190e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.039190e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.617306e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.815381e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.815381e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.381867 sec +TOTAL : 3.070946 sec INFO: No Floating Point Exceptions have been reported - 4,609,836,466 cycles # 3.330 GHz - 7,671,855,782 instructions # 1.66 insn per cycle - 1.384694427 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1438) (512y: 100) (512z: 1540) + 5,582,158,144 cycles # 1.816 GHz + 7,742,870,902 instructions # 1.39 insn per cycle + 3.076599052 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt index 8060d197b3..82194f6fe3 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt @@ -1,128 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:16:07 +DATE: 2024-05-16_15:09:12 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.587611e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.161872e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.276844e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.568860 sec +INFO: No Floating Point Exceptions have been reported + 2,269,706,021 cycles # 2.822 GHz + 3,484,022,632 instructions # 1.54 insn per cycle + 0.860923648 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028807e+00 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= -/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe: Aborted - 3,905,079 cycles # 2.680 GHz - 6,285,573 instructions # 1.61 insn per cycle - 0.039944283 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 674) (avx2: 0) (512y: 0) (512z: 0) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.045564e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.105865e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.105865e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.235402 sec +INFO: No Floating Point Exceptions have been reported + 14,997,948,844 cycles # 2.862 GHz + 38,373,416,469 instructions # 2.56 insn per cycle + 5.240872993 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515645 Relative difference = 3.258803994438787e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= -/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe: Aborted - 3,881,664 cycles # 2.680 GHz - 6,304,668 instructions # 1.62 insn per cycle - 0.038823519 seconds time elapsed +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.444491e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.632712e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.632712e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.153505 sec +INFO: No Floating Point Exceptions have been reported + 9,049,779,346 cycles # 2.866 GHz + 24,577,971,625 instructions # 2.72 insn per cycle + 3.158944927 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= -/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe: Aborted - 4,012,821 cycles # 3.252 GHz - 6,293,421 instructions # 1.57 insn per cycle - 0.038405470 seconds time elapsed +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.437117e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.904229e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.904229e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.038232 sec +INFO: No Floating Point Exceptions have been reported + 5,473,582,641 cycles # 2.680 GHz + 11,251,858,191 instructions # 2.06 insn per cycle + 2.043714380 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= -/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe: Aborted - 4,075,981 cycles # 3.242 GHz - 6,297,027 instructions # 1.54 insn per cycle - 0.038295092 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2125) (512y: 124) (512z: 0) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.066218e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.650713e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.650713e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 1.837947 sec +INFO: No Floating Point Exceptions have been reported + 4,942,309,563 cycles # 2.682 GHz + 10,557,200,123 instructions # 2.14 insn per cycle + 1.844865568 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2074) (512y: 144) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= -/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe: Aborted - 3,966,761 cycles # 3.256 GHz - 6,291,754 instructions # 1.59 insn per cycle - 0.038747033 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1438) (512y: 100) (512z: 1540) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.598977e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.794496e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.794496e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.023039 sec +INFO: No Floating Point Exceptions have been reported + 5,367,715,100 cycles # 1.773 GHz + 7,793,769,749 instructions # 1.45 insn per cycle + 3.028517366 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt index bc2e3a2d1d..3db0a99453 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt @@ -1,183 +1,216 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:15:13 +DATE: 2024-05-16_15:06:28 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.591450e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.156507e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.275190e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.728834 sec +INFO: No Floating Point Exceptions have been reported + 2,711,621,820 cycles # 2.826 GHz + 4,288,575,941 instructions # 1.58 insn per cycle + 1.017933550 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028807e+00 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.810618e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.886696e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.886696e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.858525 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.045774e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.105988e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.105988e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.234926 sec INFO: No Floating Point Exceptions have been reported - 13,499,659,763 cycles # 3.496 GHz - 38,381,595,588 instructions # 2.84 insn per cycle - 3.861871026 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 674) (avx2: 0) (512y: 0) (512z: 0) + 14,996,539,700 cycles # 2.862 GHz + 38,373,492,139 instructions # 2.56 insn per cycle + 5.240540958 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515645 Relative difference = 3.258803994438787e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.842479e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.089616e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.089616e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.292785 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.431303e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.617753e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.617753e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.165641 sec INFO: No Floating Point Exceptions have been reported - 8,029,889,332 cycles # 3.498 GHz - 24,570,970,084 instructions # 3.06 insn per cycle - 2.295676801 seconds time elapsed + 9,072,261,960 cycles # 2.862 GHz + 24,578,342,604 instructions # 2.71 insn per cycle + 3.171145800 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 7.854323e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.508345e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.508345e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.463664 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.460196e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.936686e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.936686e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.030403 sec INFO: No Floating Point Exceptions have been reported - 4,881,173,354 cycles # 3.329 GHz - 11,227,429,391 instructions # 2.30 insn per cycle - 1.466667703 seconds time elapsed + 5,452,336,471 cycles # 2.679 GHz + 11,251,160,510 instructions # 2.06 insn per cycle + 2.035938093 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 8.108854e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.793319e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.793319e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.416234 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.063893e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.649981e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.649981e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 1.838982 sec INFO: No Floating Point Exceptions have been reported - 4,724,507,539 cycles # 3.330 GHz - 10,632,467,883 instructions # 2.25 insn per cycle - 1.419048077 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2125) (512y: 124) (512z: 0) + 4,938,631,038 cycles # 2.680 GHz + 10,556,930,414 instructions # 2.14 insn per cycle + 1.844618889 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2074) (512y: 144) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 8.341696e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.055467e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.055467e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.380333 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.589787e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.785615e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.785615e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.030446 sec INFO: No Floating Point Exceptions have been reported - 4,605,178,871 cycles # 3.330 GHz - 7,671,819,434 instructions # 1.67 insn per cycle - 1.383249425 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1438) (512y: 100) (512z: 1540) + 5,385,276,295 cycles # 1.774 GHz + 7,793,583,016 instructions # 1.45 insn per cycle + 3.036161028 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt index 96b6cb5ddc..0caf1293cf 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_14:56:06 +DATE: 2024-05-16_14:35:28 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.206695e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.183658e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.279171e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.521961 sec +INFO: No Floating Point Exceptions have been reported + 2,148,802,757 cycles # 2.845 GHz + 3,054,152,486 instructions # 1.42 insn per cycle + 0.812117976 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028807e+00 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.723912e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.795203e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.795203e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.977781 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.068168e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.129039e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.129039e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.177399 sec INFO: No Floating Point Exceptions have been reported - 13,919,226,237 cycles # 3.497 GHz - 40,192,452,967 instructions # 2.89 insn per cycle - 3.980522127 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 687) (avx2: 0) (512y: 0) (512z: 0) + 15,011,872,798 cycles # 2.897 GHz + 40,100,761,049 instructions # 2.67 insn per cycle + 5.182501125 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 669) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 5.103438e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.377235e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.377235e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.180803 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.634343e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.844834e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.844834e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.993727 sec INFO: No Floating Point Exceptions have been reported - 7,633,753,795 cycles # 3.496 GHz - 23,664,117,605 instructions # 3.10 insn per cycle - 2.183625692 seconds time elapsed + 8,671,029,072 cycles # 2.892 GHz + 23,670,969,931 instructions # 2.73 insn per cycle + 2.999072752 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2072) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 7.032470e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.549227e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.549227e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.619630 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.945254e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.323667e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.323667e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.228700 sec INFO: No Floating Point Exceptions have been reported - 5,396,970,498 cycles # 3.327 GHz - 13,039,165,073 instructions # 2.42 insn per cycle - 1.622514788 seconds time elapsed + 6,081,438,462 cycles # 2.724 GHz + 13,061,002,322 instructions # 2.15 insn per cycle + 2.233958089 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2546) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 7.285369e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.833132e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.833132e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.562859 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.205594e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.622405e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.622405e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.121856 sec INFO: No Floating Point Exceptions have been reported - 5,207,784,461 cycles # 3.326 GHz - 12,264,767,613 instructions # 2.36 insn per cycle - 1.566117430 seconds time elapsed + 5,798,891,312 cycles # 2.727 GHz + 12,319,969,769 instructions # 2.12 insn per cycle + 2.127030294 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2093) (512y: 294) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 7.933129e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.586264e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.586264e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.445504 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.380432e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.550251e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.550251e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.209519 sec INFO: No Floating Point Exceptions have been reported - 4,806,228,356 cycles # 3.319 GHz - 9,536,324,378 instructions # 1.98 insn per cycle - 1.448384054 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1520) (512y: 199) (512z: 1970) + 5,821,355,640 cycles # 1.812 GHz + 9,603,981,726 instructions # 1.65 insn per cycle + 3.214724733 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1509) (512y: 209) (512z: 1971) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt index dddeeae07d..6af05ea7e1 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:07:26 +DATE: 2024-05-16_14:51:55 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.681198e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.166116e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.276872e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.529611 sec +INFO: No Floating Point Exceptions have been reported + 2,190,477,637 cycles # 2.832 GHz + 3,135,955,530 instructions # 1.43 insn per cycle + 0.830299558 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028807e+00 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.198908e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.297709e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.297709e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.405685 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.383572e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.466296e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.466296e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 4.507918 sec INFO: No Floating Point Exceptions have been reported - 11,921,831,379 cycles # 3.498 GHz - 34,393,959,805 instructions # 2.88 insn per cycle - 3.408627699 seconds time elapsed + 13,013,442,526 cycles # 2.884 GHz + 34,387,029,075 instructions # 2.64 insn per cycle + 4.513459426 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 686) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.725883e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.870404e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.870404e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.940621 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.946707e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.083881e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.083881e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.667816 sec INFO: No Floating Point Exceptions have been reported - 10,298,753,385 cycles # 3.499 GHz - 24,000,547,193 instructions # 2.33 insn per cycle - 2.943559898 seconds time elapsed + 10,591,846,077 cycles # 2.884 GHz + 24,007,245,790 instructions # 2.27 insn per cycle + 3.673406920 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2582) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 6.008721e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.379316e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.379316e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.872348 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.532632e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.849376e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.849376e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.423240 sec INFO: No Floating Point Exceptions have been reported - 6,230,740,888 cycles # 3.323 GHz - 12,378,068,788 instructions # 1.99 insn per cycle - 1.875290821 seconds time elapsed + 6,577,855,979 cycles # 2.709 GHz + 12,401,365,684 instructions # 1.89 insn per cycle + 2.428791768 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3154) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516200 Relative difference = 3.2588037208240405e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 6.541455e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.984945e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.984945e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.728378 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.754457e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.104775e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.104775e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.314834 sec INFO: No Floating Point Exceptions have been reported - 5,754,725,980 cycles # 3.325 GHz - 11,517,937,425 instructions # 2.00 insn per cycle - 1.731372509 seconds time elapsed + 6,233,998,487 cycles # 2.688 GHz + 11,576,068,199 instructions # 1.86 insn per cycle + 2.320534715 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2690) (512y: 239) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516200 Relative difference = 3.2588037208240405e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 7.969262e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.630621e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.630621e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.439347 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.687851e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.893233e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.893233e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.952132 sec INFO: No Floating Point Exceptions have been reported - 4,794,037,689 cycles # 3.325 GHz - 9,235,777,391 instructions # 1.93 insn per cycle - 1.442310458 seconds time elapsed + 5,323,772,693 cycles # 1.802 GHz + 9,296,912,008 instructions # 1.75 insn per cycle + 2.957828928 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2115) (512y: 282) (512z: 1958) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt index 848c11f2a0..2040ec21eb 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:07:38 +DATE: 2024-05-16_14:52:19 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.680230e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.168644e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.280417e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.532171 sec +INFO: No Floating Point Exceptions have been reported + 2,169,507,018 cycles # 2.828 GHz + 3,115,355,964 instructions # 1.44 insn per cycle + 0.826043020 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028807e+00 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.388100e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.498766e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.498766e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.223593 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.524819e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.617052e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.617052e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 4.262483 sec INFO: No Floating Point Exceptions have been reported - 11,284,349,168 cycles # 3.498 GHz - 35,046,689,761 instructions # 3.11 insn per cycle - 3.226554607 seconds time elapsed + 12,358,560,610 cycles # 2.896 GHz + 35,037,446,637 instructions # 2.84 insn per cycle + 4.268207887 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 457) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.701373e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.844016e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.844016e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.958949 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.908483e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.040450e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.040450e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.714757 sec INFO: No Floating Point Exceptions have been reported - 10,361,253,789 cycles # 3.499 GHz - 23,077,337,760 instructions # 2.23 insn per cycle - 2.961886589 seconds time elapsed + 10,745,562,014 cycles # 2.889 GHz + 23,084,374,218 instructions # 2.15 insn per cycle + 3.720383315 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2363) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 6.561160e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.005948e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.005948e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.725333 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.878271e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.246530e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.246530e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.258864 sec INFO: No Floating Point Exceptions have been reported - 5,745,944,141 cycles # 3.325 GHz - 11,933,253,629 instructions # 2.08 insn per cycle - 1.728282440 seconds time elapsed + 6,151,591,588 cycles # 2.717 GHz + 11,956,808,073 instructions # 1.94 insn per cycle + 2.264473200 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2509) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 6.836858e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.318369e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.318369e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.657487 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.958079e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.345089e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.345089e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.224234 sec INFO: No Floating Point Exceptions have been reported - 5,521,172,266 cycles # 3.326 GHz - 11,071,234,458 instructions # 2.01 insn per cycle - 1.660417251 seconds time elapsed + 6,017,653,055 cycles # 2.700 GHz + 11,128,128,624 instructions # 1.85 insn per cycle + 2.229785356 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2126) (512y: 174) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 8.306028e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.015563e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.015563e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.385953 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.739650e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.951827e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.951827e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.913360 sec INFO: No Floating Point Exceptions have been reported - 4,617,200,796 cycles # 3.325 GHz - 8,960,232,192 instructions # 1.94 insn per cycle - 1.388841142 seconds time elapsed + 5,212,798,448 cycles # 1.786 GHz + 9,020,884,070 instructions # 1.73 insn per cycle + 2.919040069 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1650) (512y: 208) (512z: 1567) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 1a23ff636c..93f412dad4 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_14:56:18 +DATE: 2024-05-16_14:35:52 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.088595e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.705968e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.969781e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 +TOTAL : 0.482195 sec +INFO: No Floating Point Exceptions have been reported + 2,007,920,858 cycles # 2.849 GHz + 2,840,933,430 instructions # 1.41 insn per cycle + 0.763422225 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.051088e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.152005e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.152005e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 3.552408 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.200574e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.271569e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.271569e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 4.850875 sec INFO: No Floating Point Exceptions have been reported - 12,424,102,263 cycles # 3.496 GHz - 38,259,948,535 instructions # 3.08 insn per cycle - 3.554812663 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 589) (avx2: 0) (512y: 0) (512z: 0) + 14,073,569,281 cycles # 2.899 GHz + 38,343,239,881 instructions # 2.72 insn per cycle + 4.855897587 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288199022179469 Relative difference = 4.819651478256564e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 6.542346e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.036288e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.036288e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 1.717374 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.925449e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.332953e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.332953e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.217076 sec INFO: No Floating Point Exceptions have been reported - 6,013,460,590 cycles # 3.497 GHz - 15,822,230,782 instructions # 2.63 insn per cycle - 1.719941893 seconds time elapsed + 6,436,588,824 cycles # 2.899 GHz + 15,815,821,412 instructions # 2.46 insn per cycle + 2.222049918 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2690) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193548331037 Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.245149e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.415618e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.415618e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 0.951492 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.963004e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.029520e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.029520e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.256665 sec INFO: No Floating Point Exceptions have been reported - 3,185,224,100 cycles # 3.340 GHz - 7,575,013,217 instructions # 2.38 insn per cycle - 0.954091020 seconds time elapsed + 3,455,760,948 cycles # 2.740 GHz + 7,593,976,565 instructions # 2.20 insn per cycle + 1.261861875 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3049) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181684445590 Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.281797e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.465090e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.465090e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 0.929151 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.569986e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.110539e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.110539e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.182427 sec INFO: No Floating Point Exceptions have been reported - 3,111,746,449 cycles # 3.341 GHz - 7,223,059,934 instructions # 2.32 insn per cycle - 0.931750183 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2904) (512y: 3) (512z: 0) + 3,244,770,474 cycles # 2.734 GHz + 7,203,559,407 instructions # 2.22 insn per cycle + 1.187623854 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2849) (512y: 23) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181684445590 Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.443829e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.680917e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.680917e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 0.836985 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.864494e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.605662e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.605662e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.614546 sec INFO: No Floating Point Exceptions have been reported - 2,808,031,651 cycles # 3.346 GHz - 5,761,002,673 instructions # 2.05 insn per cycle - 0.839577856 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2371) (512y: 0) (512z: 1888) + 3,050,749,421 cycles # 1.885 GHz + 5,835,755,685 instructions # 1.91 insn per cycle + 1.619564037 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2368) (512y: 24) (512z: 1888) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183349184692 Relative difference = 1.6508058850146622e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt index 06dd9166f9..426db838d7 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt @@ -1,188 +1,227 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:12:53 +DATE: 2024-05-16_15:01:19 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.801236e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.462846e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.462846e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 +TOTAL : 0.684862 sec +INFO: No Floating Point Exceptions have been reported + 2,586,573,508 cycles # 2.828 GHz + 4,016,406,941 instructions # 1.55 insn per cycle + 0.971565490 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.049014e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.149704e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.149704e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 3.575558 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.176436e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.247449e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.247449e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 4.948763 sec INFO: No Floating Point Exceptions have been reported - 12,521,485,093 cycles # 3.499 GHz - 38,305,059,348 instructions # 3.06 insn per cycle - 3.579105569 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 589) (avx2: 0) (512y: 0) (512z: 0) + 14,176,104,430 cycles # 2.862 GHz + 38,383,843,895 instructions # 2.71 insn per cycle + 4.955194603 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288199022179469 Relative difference = 4.819651478256564e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 6.491526e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.976028e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.976028e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 1.760872 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.809798e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.200764e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.200764e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.315849 sec INFO: No Floating Point Exceptions have been reported - 6,169,634,576 cycles # 3.497 GHz - 16,102,875,657 instructions # 2.61 insn per cycle - 1.764343930 seconds time elapsed + 6,633,418,276 cycles # 2.858 GHz + 16,095,968,093 instructions # 2.43 insn per cycle + 2.322298973 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2690) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193548331037 Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.226753e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.391987e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.391987e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 0.997271 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.679036e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.925640e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.925640e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.342355 sec INFO: No Floating Point Exceptions have been reported - 3,349,146,328 cycles # 3.348 GHz - 7,812,668,287 instructions # 2.33 insn per cycle - 1.000767699 seconds time elapsed + 3,640,592,514 cycles # 2.701 GHz + 7,831,268,120 instructions # 2.15 insn per cycle + 1.348786146 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3049) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181684445590 Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.260335e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.433565e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.433565e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 0.976826 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.163700e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.056629e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.056629e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.278871 sec INFO: No Floating Point Exceptions have been reported - 3,282,049,179 cycles # 3.348 GHz - 7,462,210,417 instructions # 2.27 insn per cycle - 0.980420198 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2904) (512y: 3) (512z: 0) + 3,437,646,895 cycles # 2.676 GHz + 7,439,842,858 instructions # 2.16 insn per cycle + 1.285386542 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2849) (512y: 23) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181684445590 Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.408011e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.634578e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.634578e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 0.890705 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.597215e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.292791e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.292791e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.727252 sec INFO: No Floating Point Exceptions have been reported - 2,994,251,807 cycles # 3.349 GHz - 6,016,187,326 instructions # 2.01 insn per cycle - 0.894265671 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2371) (512y: 0) (512z: 1888) + 3,258,697,081 cycles # 1.881 GHz + 6,089,840,836 instructions # 1.87 insn per cycle + 1.733818978 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2368) (512y: 24) (512z: 1888) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183349184692 Relative difference = 1.6508058850146622e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt index 39a743ab58..884891874e 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:17:01 +DATE: 2024-05-16_15:12:23 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 9.468958e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.648278e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.971571e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.079446e+00 +- 3.403306e-03 ) GeV^0 +TOTAL : 0.575797 sec +INFO: No Floating Point Exceptions have been reported + 2,271,357,910 cycles # 2.845 GHz + 3,342,640,625 instructions # 1.47 insn per cycle + 0.855647595 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.053107e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.154324e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.154324e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.198151e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.269622e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.269622e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 3.550675 sec +TOTAL : 4.914840 sec INFO: No Floating Point Exceptions have been reported - 12,430,557,442 cycles # 3.499 GHz - 38,261,252,207 instructions # 3.08 insn per cycle - 3.553342568 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 589) (avx2: 0) (512y: 0) (512z: 0) + 14,211,276,974 cycles # 2.889 GHz + 38,370,210,397 instructions # 2.70 insn per cycle + 4.920108721 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288199022179469 Relative difference = 4.819651478256564e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 6.544926e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.039506e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.039506e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.892733e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.301573e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.301573e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 1.716095 sec +TOTAL : 2.288479 sec INFO: No Floating Point Exceptions have been reported - 6,010,423,272 cycles # 3.498 GHz - 15,822,165,662 instructions # 2.63 insn per cycle - 1.718690630 seconds time elapsed + 6,608,042,838 cycles # 2.882 GHz + 15,829,158,403 instructions # 2.40 insn per cycle + 2.293691008 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2690) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193548331037 Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.247900e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.416525e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.416525e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.919042e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.023820e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.023820e+06 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 0.949440 sec +TOTAL : 1.319201 sec INFO: No Floating Point Exceptions have been reported - 3,178,973,029 cycles # 3.340 GHz - 7,575,031,563 instructions # 2.38 insn per cycle - 0.952033981 seconds time elapsed + 3,618,631,378 cycles # 2.734 GHz + 7,578,247,859 instructions # 2.09 insn per cycle + 1.324366743 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3049) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181684445590 Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.283370e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.462893e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.462893e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.492699e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.100151e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.100151e+06 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 0.928789 sec +TOTAL : 1.250231 sec INFO: No Floating Point Exceptions have been reported - 3,108,736,160 cycles # 3.339 GHz - 7,224,185,601 instructions # 2.32 insn per cycle - 0.931414201 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2904) (512y: 3) (512z: 0) + 3,418,366,623 cycles # 2.724 GHz + 7,152,275,486 instructions # 2.09 insn per cycle + 1.255758340 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2849) (512y: 23) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181684445590 Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.437994e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.673352e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.673352e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.830732e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.562097e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.562097e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 0.840102 sec +TOTAL : 1.681529 sec INFO: No Floating Point Exceptions have been reported - 2,814,712,978 cycles # 3.341 GHz - 5,761,169,749 instructions # 2.05 insn per cycle - 0.842697262 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2371) (512y: 0) (512z: 1888) + 3,218,452,038 cycles # 1.909 GHz + 5,786,270,960 instructions # 1.80 insn per cycle + 1.686847993 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2368) (512y: 24) (512z: 1888) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183349184692 Relative difference = 1.6508058850146622e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt index cc474d6ce9..9b5852a8c1 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt @@ -1,128 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:16:08 +DATE: 2024-05-16_15:09:35 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 9.497286e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.653761e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.976765e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 +TOTAL : 0.520499 sec +INFO: No Floating Point Exceptions have been reported + 2,122,949,824 cycles # 2.819 GHz + 3,308,605,661 instructions # 1.56 insn per cycle + 0.811337951 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst +==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= -/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe: Aborted - 4,114,159 cycles # 3.246 GHz - 6,325,343 instructions # 1.54 insn per cycle - 0.038529387 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 589) (avx2: 0) (512y: 0) (512z: 0) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.187282e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.258952e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.258952e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 4.881435 sec +INFO: No Floating Point Exceptions have been reported + 13,993,887,356 cycles # 2.864 GHz + 38,340,879,445 instructions # 2.74 insn per cycle + 4.886765699 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288199022179469 Relative difference = 4.819651478256564e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= -/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe: Aborted - 4,007,195 cycles # 3.247 GHz - 6,305,244 instructions # 1.57 insn per cycle - 0.037795572 seconds time elapsed +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.866184e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.266559e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.266559e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.243695 sec +INFO: No Floating Point Exceptions have been reported + 6,437,628,216 cycles # 2.863 GHz + 15,815,570,005 instructions # 2.46 insn per cycle + 2.248941783 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2690) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193548331037 Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= -/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe: Aborted - 3,978,139 cycles # 3.240 GHz - 6,305,133 instructions # 1.58 insn per cycle - 0.038545257 seconds time elapsed +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.699018e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.949673e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.949673e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.293092 sec +INFO: No Floating Point Exceptions have been reported + 3,447,035,685 cycles # 2.657 GHz + 7,594,377,345 instructions # 2.20 insn per cycle + 1.298317015 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3049) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181684445590 Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= -/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe: Aborted - 4,051,552 cycles # 3.250 GHz - 6,325,721 instructions # 1.56 insn per cycle - 0.037990069 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2904) (512y: 3) (512z: 0) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.410196e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.089229e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.089229e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.202332 sec +INFO: No Floating Point Exceptions have been reported + 3,248,094,322 cycles # 2.691 GHz + 7,201,883,054 instructions # 2.22 insn per cycle + 1.207739630 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2849) (512y: 23) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181684445590 Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= -/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe: Aborted - 4,166,692 cycles # 2.635 GHz - 6,311,223 instructions # 1.51 insn per cycle - 0.038878370 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2371) (512y: 0) (512z: 1888) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.682713e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.392370e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.392370e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.657517 sec +INFO: No Floating Point Exceptions have been reported + 3,060,341,406 cycles # 1.842 GHz + 5,836,262,166 instructions # 1.91 insn per cycle + 1.662864711 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2368) (512y: 24) (512z: 1888) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183349184692 Relative difference = 1.6508058850146622e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt index 2598a17a4b..7e3b1fa48e 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt @@ -1,183 +1,216 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:15:25 +DATE: 2024-05-16_15:06:52 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.502594e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.623050e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.943883e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 +TOTAL : 0.628666 sec +INFO: No Floating Point Exceptions have been reported + 2,403,264,425 cycles # 2.820 GHz + 3,734,811,294 instructions # 1.55 insn per cycle + 0.909767197 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.055035e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.156447e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.156447e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 3.547705 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.185686e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.257300e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.257300e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 4.884819 sec INFO: No Floating Point Exceptions have been reported - 12,420,939,646 cycles # 3.499 GHz - 38,261,315,045 instructions # 3.08 insn per cycle - 3.550403868 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 589) (avx2: 0) (512y: 0) (512z: 0) + 13,995,449,913 cycles # 2.863 GHz + 38,340,978,131 instructions # 2.74 insn per cycle + 4.889991891 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288199022179469 Relative difference = 4.819651478256564e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 6.543960e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.036828e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.036828e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 1.716108 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.864053e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.263128e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.263128e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.244540 sec INFO: No Floating Point Exceptions have been reported - 6,012,121,369 cycles # 3.498 GHz - 15,822,170,874 instructions # 2.63 insn per cycle - 1.718814295 seconds time elapsed + 6,436,419,349 cycles # 2.862 GHz + 15,815,556,279 instructions # 2.46 insn per cycle + 2.249779623 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2690) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193548331037 Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.245919e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.415937e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.415937e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 0.951520 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.799961e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.008748e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.008748e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.279634 sec INFO: No Floating Point Exceptions have been reported - 3,185,388,754 cycles # 3.340 GHz - 7,574,845,638 instructions # 2.38 insn per cycle - 0.954103825 seconds time elapsed + 3,447,592,643 cycles # 2.685 GHz + 7,593,708,789 instructions # 2.20 insn per cycle + 1.284877623 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3049) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181684445590 Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.288057e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.468111e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.468111e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 0.925986 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.434984e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.092289e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.092289e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.198937 sec INFO: No Floating Point Exceptions have been reported - 3,101,678,421 cycles # 3.341 GHz - 7,222,956,816 instructions # 2.33 insn per cycle - 0.928709358 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2904) (512y: 3) (512z: 0) + 3,242,375,801 cycles # 2.694 GHz + 7,202,509,960 instructions # 2.22 insn per cycle + 1.204245270 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2849) (512y: 23) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181684445590 Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.438320e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.674891e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.674891e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 0.839988 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.713311e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.432943e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.432943e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.650810 sec INFO: No Floating Point Exceptions have been reported - 2,816,211,672 cycles # 3.343 GHz - 5,760,731,099 instructions # 2.05 insn per cycle - 0.842654266 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2371) (512y: 0) (512z: 1888) + 3,050,285,995 cycles # 1.842 GHz + 5,834,789,164 instructions # 1.91 insn per cycle + 1.656446986 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2368) (512y: 24) (512z: 1888) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183349184692 Relative difference = 1.6508058850146622e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt index abb5b0cb23..3e123e6fd7 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_14:56:27 +DATE: 2024-05-16_14:36:11 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.096553e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.763289e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.037690e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 +TOTAL : 0.480283 sec +INFO: No Floating Point Exceptions have been reported + 2,036,711,218 cycles # 2.852 GHz + 2,918,453,967 instructions # 1.43 insn per cycle + 0.771336406 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.116597e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.221945e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.221945e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 3.480996 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.166079e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.236793e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.236793e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 4.926463 sec INFO: No Floating Point Exceptions have been reported - 12,173,964,605 cycles # 3.496 GHz - 39,802,438,725 instructions # 3.27 insn per cycle - 3.483478182 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 580) (avx2: 0) (512y: 0) (512z: 0) + 14,320,299,267 cycles # 2.905 GHz + 39,836,243,439 instructions # 2.78 insn per cycle + 4.931482509 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 570) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288199028000236 Relative difference = 4.790961076489297e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 8.116195e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.907393e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.907393e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 1.406100 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.723514e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.285593e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.285593e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 1.919156 sec INFO: No Floating Point Exceptions have been reported - 4,925,094,494 cycles # 3.497 GHz - 15,291,998,664 instructions # 3.10 insn per cycle - 1.408797942 seconds time elapsed + 5,582,245,803 cycles # 2.902 GHz + 15,285,424,302 instructions # 2.74 insn per cycle + 1.924109376 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2474) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193548331037 Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 9.102214e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.978269e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.978269e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.261979 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.349024e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.991002e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.991002e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.737963 sec INFO: No Floating Point Exceptions have been reported - 4,207,491,304 cycles # 3.328 GHz - 9,715,468,356 instructions # 2.31 insn per cycle - 1.264618453 seconds time elapsed + 4,749,494,972 cycles # 2.726 GHz + 9,735,095,064 instructions # 2.05 insn per cycle + 1.742978161 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3708) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288182108197361 Relative difference = 1.0391259163456515e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 9.226890e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.012042e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.012042e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.247778 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.536931e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.219273e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.219273e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.690263 sec INFO: No Floating Point Exceptions have been reported - 4,163,957,666 cycles # 3.331 GHz - 9,272,726,706 instructions # 2.23 insn per cycle - 1.250391290 seconds time elapsed + 4,623,322,631 cycles # 2.728 GHz + 9,325,575,279 instructions # 2.02 insn per cycle + 1.695318457 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3496) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288182108197361 Relative difference = 1.0391259163456515e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.175307e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.329979e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.329979e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.002712 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.572579e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.052133e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.052133e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.968034 sec INFO: No Floating Point Exceptions have been reported - 3,349,707,803 cycles # 3.333 GHz - 6,969,691,373 instructions # 2.08 insn per cycle - 1.005329902 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2611) (512y: 0) (512z: 2220) + 3,660,831,684 cycles # 1.856 GHz + 7,034,974,988 instructions # 1.92 insn per cycle + 1.973212700 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2610) (512y: 12) (512z: 2220) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183459779248 Relative difference = 1.7053177021099307e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt index a2a4f8bd4a..c7eded0fc2 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:07:51 +DATE: 2024-05-16_14:52:42 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 9.456356e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.657836e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.983561e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 +TOTAL : 0.487671 sec +INFO: No Floating Point Exceptions have been reported + 2,030,099,363 cycles # 2.844 GHz + 2,856,891,631 instructions # 1.41 insn per cycle + 0.771313393 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.274775e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.391142e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.391142e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 3.317724 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.397227e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.481743e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.481743e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 4.460576 sec INFO: No Floating Point Exceptions have been reported - 11,614,161,745 cycles # 3.498 GHz - 34,394,408,726 instructions # 2.96 insn per cycle - 3.320490594 seconds time elapsed + 12,588,647,411 cycles # 2.819 GHz + 34,372,288,545 instructions # 2.73 insn per cycle + 4.465853868 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 696) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288199094356969 Relative difference = 4.463890496342449e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 7.102905e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.687439e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.687439e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 1.590451 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.225217e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.687950e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.687950e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.095644 sec INFO: No Floating Point Exceptions have been reported - 5,571,972,185 cycles # 3.498 GHz - 14,867,156,586 instructions # 2.67 insn per cycle - 1.593201332 seconds time elapsed + 6,085,238,066 cycles # 2.897 GHz + 14,860,574,019 instructions # 2.44 insn per cycle + 2.101017455 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3009) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193803280592 Relative difference = 1.8746278463897685e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 9.246464e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.014393e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.014393e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.243841 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.969640e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.750011e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.750011e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.592133 sec INFO: No Floating Point Exceptions have been reported - 4,150,312,815 cycles # 3.330 GHz - 9,009,830,893 instructions # 2.17 insn per cycle - 1.246594471 seconds time elapsed + 4,316,607,801 cycles # 2.703 GHz + 9,028,975,402 instructions # 2.09 insn per cycle + 1.597664902 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4443) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181999931112 Relative difference = 9.857617164523888e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 9.750139e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.075227e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.075227e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.187335 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.187100e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.023996e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.023996e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.545422 sec INFO: No Floating Point Exceptions have been reported - 3,964,482,809 cycles # 3.332 GHz - 8,611,789,877 instructions # 2.17 insn per cycle - 1.190075774 seconds time elapsed + 4,204,195,380 cycles # 2.712 GHz + 8,663,569,400 instructions # 2.06 insn per cycle + 1.550927334 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4243) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181999931112 Relative difference = 9.857617164523888e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.076286e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.204393e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.204393e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.085926 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.251438e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.680453e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.680453e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 2.083936 sec INFO: No Floating Point Exceptions have been reported - 3,624,515,529 cycles # 3.331 GHz - 7,753,366,929 instructions # 2.14 insn per cycle - 1.088713881 seconds time elapsed + 3,833,998,104 cycles # 1.836 GHz + 7,808,361,622 instructions # 2.04 insn per cycle + 2.089489123 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4424) (512y: 0) (512z: 2555) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183246739209 Relative difference = 1.6003107281264138e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt index c17343ca4e..aad34f68a4 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:08:00 +DATE: 2024-05-16_14:53:02 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 9.520611e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.721194e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.056652e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 +TOTAL : 0.485221 sec +INFO: No Floating Point Exceptions have been reported + 2,023,639,378 cycles # 2.841 GHz + 2,891,046,466 instructions # 1.43 insn per cycle + 0.769493206 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.588851e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.729160e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.729160e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 3.036980 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.614708e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.719370e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.719370e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 4.097789 sec INFO: No Floating Point Exceptions have been reported - 10,627,447,348 cycles # 3.497 GHz - 35,130,770,567 instructions # 3.31 insn per cycle - 3.039654652 seconds time elapsed + 11,755,034,517 cycles # 2.866 GHz + 35,108,588,793 instructions # 2.99 insn per cycle + 4.103114971 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 470) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288199094356969 Relative difference = 4.463890496342449e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 7.183811e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.782224e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.782224e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 1.574332 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.332294e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.809853e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.809853e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.053683 sec INFO: No Floating Point Exceptions have been reported - 5,512,574,555 cycles # 3.496 GHz - 14,476,901,478 instructions # 2.63 insn per cycle - 1.577023405 seconds time elapsed + 5,951,415,517 cycles # 2.891 GHz + 14,470,123,335 instructions # 2.43 insn per cycle + 2.059025817 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2572) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193583255634 Relative difference = 1.7661780742548925e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.001680e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.107747e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.107747e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.156263 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.326940e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.191185e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.191185e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.518155 sec INFO: No Floating Point Exceptions have been reported - 3,862,044,679 cycles # 3.333 GHz - 8,855,760,137 instructions # 2.29 insn per cycle - 1.159048658 seconds time elapsed + 4,152,217,913 cycles # 2.727 GHz + 8,874,854,960 instructions # 2.14 insn per cycle + 1.523530355 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3574) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288182107033208 Relative difference = 1.0385521077446488e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 9.511281e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.045917e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.045917e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.213783 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.326335e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.192412e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.192412e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.518142 sec INFO: No Floating Point Exceptions have been reported - 4,051,985,329 cycles # 3.332 GHz - 8,359,035,113 instructions # 2.06 insn per cycle - 1.216495377 seconds time elapsed + 4,138,145,120 cycles # 2.717 GHz + 8,411,511,000 instructions # 2.03 insn per cycle + 1.523559219 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3319) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288182107033208 Relative difference = 1.0385521077446488e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.090474e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.221042e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.221042e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.073137 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.337364e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.777859e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.777859e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 2.053123 sec INFO: No Floating Point Exceptions have been reported - 3,583,841,042 cycles # 3.332 GHz - 7,646,282,047 instructions # 2.13 insn per cycle - 1.075899915 seconds time elapsed + 3,784,038,038 cycles # 1.840 GHz + 7,702,433,783 instructions # 2.04 insn per cycle + 2.058532499 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3440) (512y: 0) (512z: 2107) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183204829693 Relative difference = 1.5796536184903122e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index 6068583176..ff88d5da2d 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_14:56:37 +DATE: 2024-05-16_14:36:32 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.198792e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.180605e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.275668e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.521467 sec +INFO: No Floating Point Exceptions have been reported + 2,143,649,339 cycles # 2.843 GHz + 3,098,162,725 instructions # 1.45 insn per cycle + 0.810608393 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028807e+00 +Avg ME (F77/GPU) = 2.0288063423243874 +Relative difference = 3.241686432649386e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.746015e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.818179e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.818179e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.946181 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.033714e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.092456e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.092456e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.262850 sec INFO: No Floating Point Exceptions have been reported - 13,806,209,783 cycles # 3.496 GHz - 38,515,589,460 instructions # 2.79 insn per cycle - 3.948957492 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 678) (avx2: 0) (512y: 0) (512z: 0) + 15,278,986,093 cycles # 2.901 GHz + 38,575,389,182 instructions # 2.52 insn per cycle + 5.268064562 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 672) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.896530e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.148454e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.148454e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.267633 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.527314e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.723139e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.723139e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.080390 sec INFO: No Floating Point Exceptions have been reported - 7,935,743,710 cycles # 3.496 GHz - 24,216,719,982 instructions # 3.05 insn per cycle - 2.270534850 seconds time elapsed + 8,961,614,258 cycles # 2.906 GHz + 24,226,315,758 instructions # 2.70 insn per cycle + 3.085434765 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2188) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 7.984920e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.657115e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.657115e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.441248 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.613394e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.100134e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.100134e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 1.976346 sec INFO: No Floating Point Exceptions have been reported - 4,807,387,010 cycles # 3.330 GHz - 11,202,723,427 instructions # 2.33 insn per cycle - 1.444162110 seconds time elapsed + 5,394,338,439 cycles # 2.724 GHz + 11,277,527,499 instructions # 2.09 insn per cycle + 1.981499886 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2480) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288064057068964 Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 8.277957e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.991395e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.991395e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.389888 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.276948e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.897611e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.897611e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 1.778784 sec INFO: No Floating Point Exceptions have been reported - 4,638,113,998 cycles # 3.331 GHz - 10,547,744,593 instructions # 2.27 insn per cycle - 1.392738428 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2218) (512y: 128) (512z: 0) + 4,855,499,941 cycles # 2.723 GHz + 10,526,571,188 instructions # 2.17 insn per cycle + 1.784170390 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2167) (512y: 148) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288064057068964 Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 8.526422e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.287794e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.287794e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.353338 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.815864e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.036087e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.036087e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.856419 sec INFO: No Floating Point Exceptions have been reported - 4,510,300,771 cycles # 3.326 GHz - 7,441,315,374 instructions # 1.65 insn per cycle - 1.356176926 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1626) (512y: 104) (512z: 1606) + 5,199,981,370 cycles # 1.818 GHz + 7,603,665,117 instructions # 1.46 insn per cycle + 2.861804972 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1633) (512y: 126) (512z: 1608) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288064057068964 Relative difference = 2.9292737240031234e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt index 0a2e8b6ea9..1d76304278 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_14:56:49 +DATE: 2024-05-16_14:36:55 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.208651e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.184994e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.280716e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.522551 sec +INFO: No Floating Point Exceptions have been reported + 2,145,230,616 cycles # 2.840 GHz + 3,093,123,772 instructions # 1.44 insn per cycle + 0.812278354 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028807e+00 +Avg ME (F77/GPU) = 2.0288063423243874 +Relative difference = 3.241686432649386e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.687332e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.756728e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.756728e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.029679 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.021911e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.079930e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.079930e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.294031 sec INFO: No Floating Point Exceptions have been reported - 14,101,428,195 cycles # 3.497 GHz - 40,344,626,403 instructions # 2.86 insn per cycle - 4.032520152 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 683) (avx2: 0) (512y: 0) (512z: 0) + 15,341,153,400 cycles # 2.896 GHz + 40,370,282,827 instructions # 2.63 insn per cycle + 5.299425936 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 669) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 5.095887e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.369304e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.369304e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.183673 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.710012e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.926494e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.926494e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.935201 sec INFO: No Floating Point Exceptions have been reported - 7,645,788,462 cycles # 3.497 GHz - 23,246,893,330 instructions # 3.04 insn per cycle - 2.186554544 seconds time elapsed + 8,515,314,447 cycles # 2.897 GHz + 23,253,613,819 instructions # 2.73 insn per cycle + 2.940392108 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2091) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 6.825873e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.312613e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.312613e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.663253 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.780066e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.132607e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.132607e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.302191 sec INFO: No Floating Point Exceptions have been reported - 5,540,656,190 cycles # 3.326 GHz - 12,888,464,114 instructions # 2.33 insn per cycle - 1.666087029 seconds time elapsed + 6,262,262,467 cycles # 2.715 GHz + 12,962,490,062 instructions # 2.07 insn per cycle + 2.307689771 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2669) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288064057068964 Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 7.204420e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.743180e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.743180e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.579391 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.109643e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.511847e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.511847e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.160493 sec INFO: No Floating Point Exceptions have been reported - 5,263,221,323 cycles # 3.327 GHz - 12,084,809,163 instructions # 2.30 insn per cycle - 1.582218339 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2210) (512y: 296) (512z: 0) + 5,903,466,716 cycles # 2.727 GHz + 12,238,680,442 instructions # 2.07 insn per cycle + 2.165768560 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2209) (512y: 296) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288064057068964 Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 8.184283e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.882775e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.882775e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.404252 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.507940e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.694154e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.694154e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.097205 sec INFO: No Floating Point Exceptions have been reported - 4,678,120,778 cycles # 3.325 GHz - 8,673,946,741 instructions # 1.85 insn per cycle - 1.407067408 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1501) (512y: 173) (512z: 1908) + 5,614,268,818 cycles # 1.810 GHz + 8,744,074,840 instructions # 1.56 insn per cycle + 3.102417520 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1490) (512y: 183) (512z: 1909) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288064057068964 Relative difference = 2.9292737240031234e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index b67eec5b5f..1d7490861d 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -1,183 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-16_14:57:01 +DATE: 2024-05-16_14:37:19 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.992211e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.047041e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.061161e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.469322 sec +INFO: No Floating Point Exceptions have been reported + 1,970,950,644 cycles # 2.853 GHz + 2,836,233,202 instructions # 1.44 insn per cycle + 0.747868437 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.129686e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.329949e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.341716e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.607217 sec +INFO: No Floating Point Exceptions have been reported + 2,397,125,482 cycles # 2.825 GHz + 3,658,262,516 instructions # 1.53 insn per cycle + 0.909559944 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.413122e+00 +Avg ME (F77/GPU) = 1.4131213684418649 +Relative difference = 4.469239988637851e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.217492e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.232488e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.232488e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 5.118007 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.379379e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.391311e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.391311e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.910347 sec INFO: No Floating Point Exceptions have been reported - 17,903,778,105 cycles # 3.497 GHz - 59,484,835,491 instructions # 3.32 insn per cycle - 5.119663538 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1439) (avx2: 0) (512y: 0) (512z: 0) + 19,789,020,586 cycles # 2.863 GHz + 59,609,829,111 instructions # 3.01 insn per cycle + 6.914699001 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1466) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 6.227259e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.282790e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.282790e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 2.655687 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.619966e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.665049e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.665049e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.569460 sec INFO: No Floating Point Exceptions have been reported - 9,284,800,953 cycles # 3.494 GHz - 30,662,235,138 instructions # 3.30 insn per cycle - 2.657382930 seconds time elapsed + 10,374,266,250 cycles # 2.904 GHz + 30,674,256,165 instructions # 2.96 insn per cycle + 3.573646642 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 5153) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.305459e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.328152e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.328152e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.279118 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.120184e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.293257e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.293257e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.820051 sec INFO: No Floating Point Exceptions have been reported - 4,231,801,732 cycles # 3.305 GHz - 11,003,256,880 instructions # 2.60 insn per cycle - 1.280874454 seconds time elapsed + 4,901,380,147 cycles # 2.688 GHz + 11,019,047,598 instructions # 2.25 insn per cycle + 1.824311195 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4467) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.381557e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.407014e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.407014e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.210453 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.028182e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.049956e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.049956e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.616748 sec INFO: No Floating Point Exceptions have been reported - 4,005,080,732 cycles # 3.305 GHz - 10,337,496,214 instructions # 2.58 insn per cycle - 1.212133030 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4220) (512y: 57) (512z: 0) + 4,378,615,331 cycles # 2.702 GHz + 10,296,117,856 instructions # 2.35 insn per cycle + 1.621129053 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4137) (512y: 91) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.914830e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.964196e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.964196e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 0.880242 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.954224e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.056280e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.056280e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.381742 sec INFO: No Floating Point Exceptions have been reported - 2,911,648,623 cycles # 3.303 GHz - 5,792,517,541 instructions # 1.99 insn per cycle - 0.881891759 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1544) (512y: 63) (512z: 3463) + 4,108,596,097 cycles # 1.723 GHz + 5,842,404,115 instructions # 1.42 insn per cycle + 2.385936782 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1540) (512y: 95) (512z: 3466) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt index 83df88d84d..45a1ef164b 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt @@ -1,188 +1,246 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-16_15:13:03 +DATE: 2024-05-16_15:01:40 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.535443e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.780857e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.780857e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.503670 sec +INFO: No Floating Point Exceptions have been reported + 2,012,376,201 cycles # 2.812 GHz + 3,006,218,540 instructions # 1.49 insn per cycle + 0.774572160 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.606024e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.624765e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.624765e+06 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.841754 sec +INFO: No Floating Point Exceptions have been reported + 3,099,668,806 cycles # 2.832 GHz + 4,993,276,525 instructions # 1.61 insn per cycle + 1.155254157 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.413122e+00 +Avg ME (F77/GPU) = 1.4131213684418649 +Relative difference = 4.469239988637851e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.220075e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.235259e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.235259e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 5.118042 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.380068e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.392068e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.392068e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.915910 sec INFO: No Floating Point Exceptions have been reported - 17,911,933,736 cycles # 3.499 GHz - 59,489,757,567 instructions # 3.32 insn per cycle - 5.119926420 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1439) (avx2: 0) (512y: 0) (512z: 0) + 19,806,579,322 cycles # 2.863 GHz + 59,611,012,266 instructions # 3.01 insn per cycle + 6.920308116 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1466) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 6.224279e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.279799e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.279799e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 2.661489 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.550339e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.594733e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.594733e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.631431 sec INFO: No Floating Point Exceptions have been reported - 9,312,150,160 cycles # 3.497 GHz - 30,709,998,487 instructions # 3.30 insn per cycle - 2.663374930 seconds time elapsed + 10,404,134,292 cycles # 2.862 GHz + 30,722,305,980 instructions # 2.95 insn per cycle + 3.635916319 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 5153) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.303785e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.326328e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.326328e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.285845 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.991824e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.166141e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.166141e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.853778 sec INFO: No Floating Point Exceptions have been reported - 4,255,534,359 cycles # 3.305 GHz - 11,054,510,411 instructions # 2.60 insn per cycle - 1.287820536 seconds time elapsed + 4,943,570,309 cycles # 2.661 GHz + 11,067,752,215 instructions # 2.24 insn per cycle + 1.858370590 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4467) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.379267e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.404698e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.404698e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.217248 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.005140e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.026682e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.026682e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.662867 sec INFO: No Floating Point Exceptions have been reported - 4,029,105,842 cycles # 3.306 GHz - 10,386,401,263 instructions # 2.58 insn per cycle - 1.219158432 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4220) (512y: 57) (512z: 0) + 4,426,260,539 cycles # 2.656 GHz + 10,346,882,831 instructions # 2.34 insn per cycle + 1.667431238 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4137) (512y: 91) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.912058e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.960855e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.960855e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 0.886228 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.832038e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.932754e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.932754e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.431611 sec INFO: No Floating Point Exceptions have been reported - 2,935,578,198 cycles # 3.307 GHz - 5,830,336,340 instructions # 1.99 insn per cycle - 0.888122471 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1544) (512y: 63) (512z: 3463) + 4,145,808,516 cycles # 1.702 GHz + 5,880,428,508 instructions # 1.42 insn per cycle + 2.436095886 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1540) (512y: 95) (512z: 3466) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt index 54930f8d73..c8d4c1d012 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt @@ -1,183 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-16_14:57:14 +DATE: 2024-05-16_14:37:45 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.984938e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.044546e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.056865e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.468609 sec +INFO: No Floating Point Exceptions have been reported + 1,981,002,182 cycles # 2.846 GHz + 2,842,945,772 instructions # 1.44 insn per cycle + 0.752497111 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.119070e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.315352e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.326681e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.601408 sec +INFO: No Floating Point Exceptions have been reported + 2,383,936,937 cycles # 2.851 GHz + 3,651,729,049 instructions # 1.53 insn per cycle + 0.896728355 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.413122e+00 +Avg ME (F77/GPU) = 1.4131213684418649 +Relative difference = 4.469239988637851e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.260516e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.276117e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.276117e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 5.051336 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.454763e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.467389e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.467389e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.698212 sec INFO: No Floating Point Exceptions have been reported - 17,654,492,955 cycles # 3.494 GHz - 58,797,490,071 instructions # 3.33 insn per cycle - 5.053066480 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1323) (avx2: 0) (512y: 0) (512z: 0) + 19,500,935,732 cycles # 2.911 GHz + 58,799,003,967 instructions # 3.02 insn per cycle + 6.702449206 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1313) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 6.250080e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.306668e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.306668e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 2.646454 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.669930e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.715854e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.715854e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.531511 sec INFO: No Floating Point Exceptions have been reported - 9,254,451,969 cycles # 3.495 GHz - 30,333,424,248 instructions # 3.28 insn per cycle - 2.648175602 seconds time elapsed + 10,228,095,464 cycles # 2.894 GHz + 30,347,180,891 instructions # 2.97 insn per cycle + 3.535798492 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 4970) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.250886e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.271590e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.271590e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.333796 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.789972e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.950829e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.950829e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.887432 sec INFO: No Floating Point Exceptions have been reported - 4,412,203,557 cycles # 3.304 GHz - 11,467,908,522 instructions # 2.60 insn per cycle - 1.335544581 seconds time elapsed + 5,055,118,079 cycles # 2.674 GHz + 11,484,444,983 instructions # 2.27 insn per cycle + 1.891612421 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4591) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.318979e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.342183e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.342183e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.266599 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.667837e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.860484e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.860484e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.718788 sec INFO: No Floating Point Exceptions have been reported - 4,190,277,758 cycles # 3.305 GHz - 10,842,214,694 instructions # 2.59 insn per cycle - 1.268323708 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4215) (512y: 230) (512z: 0) + 4,655,858,880 cycles # 2.704 GHz + 10,842,096,596 instructions # 2.33 insn per cycle + 1.722993406 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4183) (512y: 244) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.895707e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.943459e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.943459e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 0.888731 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.981237e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.082937e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.082937e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.372571 sec INFO: No Floating Point Exceptions have been reported - 2,941,343,592 cycles # 3.304 GHz - 6,060,826,924 instructions # 2.06 insn per cycle - 0.890391590 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1464) (512y: 118) (512z: 3566) + 4,129,142,877 cycles # 1.738 GHz + 6,106,185,085 instructions # 1.48 insn per cycle + 2.376879303 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1457) (512y: 139) (512z: 3568) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 50861b85cf..e4bc7cf2cc 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -1,183 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-16_14:57:27 +DATE: 2024-05-16_14:38:10 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.514552e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.271085e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.366020e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 +TOTAL : 0.450662 sec +INFO: No Floating Point Exceptions have been reported + 1,888,418,045 cycles # 2.834 GHz + 2,686,004,303 instructions # 1.42 insn per cycle + 0.722549365 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 254 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.424662e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.459806e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.527254e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.630099e+02 +- 4.770719e+02 ) GeV^-2 +TOTAL : 0.495261 sec +INFO: No Floating Point Exceptions have been reported + 2,099,817,827 cycles # 2.862 GHz + 2,990,738,948 instructions # 1.42 insn per cycle + 0.790419941 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.412608e+00 +Avg ME (F77/GPU) = 1.4132214346515752 +Relative difference = 0.00043425681546129636 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.303784e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.320597e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.320597e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.724764e+02 +- 2.665343e+02 ) GeV^-2 -TOTAL : 4.984157 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.505220e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.518346e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.518346e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 6.562288 sec INFO: No Floating Point Exceptions have been reported - 17,430,828,591 cycles # 3.497 GHz - 58,906,467,913 instructions # 3.38 insn per cycle - 4.985748040 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1027) (avx2: 0) (512y: 0) (512z: 0) + 19,080,957,547 cycles # 2.906 GHz + 58,959,648,789 instructions # 3.09 insn per cycle + 6.566573323 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1034) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.412986e+00 Avg ME (F77/C++) = 1.4129858051842916 Relative difference = 1.3787518662898538e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.100286e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.119257e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.119257e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.724758e+02 +- 2.665339e+02 ) GeV^-2 -TOTAL : 1.512107 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.204155e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.352745e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.352745e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 2.018056 sec INFO: No Floating Point Exceptions have been reported - 5,290,980,353 cycles # 3.496 GHz - 16,686,581,131 instructions # 3.15 insn per cycle - 1.513784932 seconds time elapsed + 5,861,245,947 cycles # 2.899 GHz + 16,693,370,121 instructions # 2.85 insn per cycle + 2.022246601 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 5765) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.412987e+00 Avg ME (F77/C++) = 1.4129865669244737 Relative difference = 3.06496469061158e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.454877e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.549512e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.549512e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743722e+02 +- 2.676604e+02 ) GeV^-2 -TOTAL : 0.690038 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.747206e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.811751e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.811751e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 +TOTAL : 0.959718 sec INFO: No Floating Point Exceptions have been reported - 2,286,098,444 cycles # 3.307 GHz - 5,966,861,474 instructions # 2.61 insn per cycle - 0.691674223 seconds time elapsed + 2,597,973,759 cycles # 2.697 GHz + 5,979,816,432 instructions # 2.30 insn per cycle + 0.963957244 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4917) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 Avg ME (F77/C++) = 1.4133161655815059 Relative difference = 1.1715816267550621e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.609633e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.709894e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.709894e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743722e+02 +- 2.676604e+02 ) GeV^-2 -TOTAL : 0.651107 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.928786e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.008064e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.008064e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 +TOTAL : 0.871454 sec INFO: No Floating Point Exceptions have been reported - 2,157,337,429 cycles # 3.307 GHz - 5,614,584,835 instructions # 2.60 insn per cycle - 0.652720509 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4726) (512y: 2) (512z: 0) + 2,346,801,151 cycles # 2.682 GHz + 5,601,970,539 instructions # 2.39 insn per cycle + 0.875813732 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4645) (512y: 36) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 Avg ME (F77/C++) = 1.4133161655815059 Relative difference = 1.1715816267550621e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.927137e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.160943e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.160943e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743730e+02 +- 2.676609e+02 ) GeV^-2 -TOTAL : 0.440370 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.412327e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.455439e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.455439e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 1.184240 sec INFO: No Floating Point Exceptions have been reported - 1,459,851,828 cycles # 3.305 GHz - 3,301,929,201 instructions # 2.26 insn per cycle - 0.441970363 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2157) (512y: 7) (512z: 3674) + 2,059,493,323 cycles # 1.734 GHz + 3,333,364,881 instructions # 1.62 insn per cycle + 1.188531798 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2144) (512y: 39) (512z: 3675) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 Avg ME (F77/C++) = 1.4133164033579249 Relative difference = 2.85398258307829e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt index 5daaab0eb2..d735dc5897 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt @@ -1,188 +1,246 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-16_15:13:15 +DATE: 2024-05-16_15:02:06 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.750186e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.085490e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.085490e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.009071e+02 +- 5.002295e+01 ) GeV^-2 +TOTAL : 0.469338 sec +INFO: No Floating Point Exceptions have been reported + 1,918,362,944 cycles # 2.804 GHz + 2,834,169,916 instructions # 1.48 insn per cycle + 0.742178075 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 254 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.524122e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.570005e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.570005e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.737500e+02 +- 4.776370e+02 ) GeV^-2 +TOTAL : 0.651816 sec +INFO: No Floating Point Exceptions have been reported + 2,503,160,784 cycles # 2.822 GHz + 3,832,792,162 instructions # 1.53 insn per cycle + 0.943470239 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.412608e+00 +Avg ME (F77/GPU) = 1.4132214346515752 +Relative difference = 0.00043425681546129636 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.307510e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.324633e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.324633e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.724764e+02 +- 2.665343e+02 ) GeV^-2 -TOTAL : 4.980490 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.465694e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.479110e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.479110e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 6.671662 sec INFO: No Floating Point Exceptions have been reported - 17,424,453,776 cycles # 3.497 GHz - 58,911,014,988 instructions # 3.38 insn per cycle - 4.982343398 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1027) (avx2: 0) (512y: 0) (512z: 0) + 19,108,337,453 cycles # 2.863 GHz + 58,967,331,894 instructions # 3.09 insn per cycle + 6.675976597 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1034) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.412986e+00 Avg ME (F77/C++) = 1.4129858051842916 Relative difference = 1.3787518662898538e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.092074e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.110752e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.110752e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.724758e+02 +- 2.665339e+02 ) GeV^-2 -TOTAL : 1.526769 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.093089e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.238027e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.238027e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 2.051178 sec INFO: No Floating Point Exceptions have been reported - 5,340,998,015 cycles # 3.495 GHz - 16,734,817,199 instructions # 3.13 insn per cycle - 1.528515210 seconds time elapsed + 5,880,119,320 cycles # 2.862 GHz + 16,741,679,626 instructions # 2.85 insn per cycle + 2.055508197 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 5765) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.412987e+00 Avg ME (F77/C++) = 1.4129865669244737 Relative difference = 3.06496469061158e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.451134e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.545641e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.545641e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743722e+02 +- 2.676604e+02 ) GeV^-2 -TOTAL : 0.693955 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.718905e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.782305e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.782305e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 +TOTAL : 0.980043 sec INFO: No Floating Point Exceptions have been reported - 2,299,980,015 cycles # 3.307 GHz - 6,004,079,961 instructions # 2.61 insn per cycle - 0.695744167 seconds time elapsed + 2,616,418,693 cycles # 2.660 GHz + 6,017,096,104 instructions # 2.30 insn per cycle + 0.984343134 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4917) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 Avg ME (F77/C++) = 1.4133161655815059 Relative difference = 1.1715816267550621e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.603457e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.705544e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.705544e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743722e+02 +- 2.676604e+02 ) GeV^-2 -TOTAL : 0.655444 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.912882e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.991175e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.991175e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 +TOTAL : 0.883189 sec INFO: No Floating Point Exceptions have been reported - 2,172,433,478 cycles # 3.307 GHz - 5,651,609,485 instructions # 2.60 insn per cycle - 0.657170212 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4726) (512y: 2) (512z: 0) + 2,365,822,002 cycles # 2.667 GHz + 5,638,771,692 instructions # 2.38 insn per cycle + 0.887626463 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4645) (512y: 36) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 Avg ME (F77/C++) = 1.4133161655815059 Relative difference = 1.1715816267550621e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.911036e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.142668e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.142668e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743730e+02 +- 2.676609e+02 ) GeV^-2 -TOTAL : 0.445137 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.399129e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.441231e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.441231e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 1.200076 sec INFO: No Floating Point Exceptions have been reported - 1,476,582,170 cycles # 3.306 GHz - 3,343,481,743 instructions # 2.26 insn per cycle - 0.446859671 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2157) (512y: 7) (512z: 3674) + 2,081,452,605 cycles # 1.729 GHz + 3,374,965,036 instructions # 1.62 insn per cycle + 1.204429196 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2144) (512y: 39) (512z: 3675) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 Avg ME (F77/C++) = 1.4133164033579249 Relative difference = 2.85398258307829e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt index 282ec8ea18..3d41e21b12 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt @@ -1,183 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-16_14:57:36 +DATE: 2024-05-16_14:38:31 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.548366e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.290418e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.382374e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 +TOTAL : 0.453301 sec +INFO: No Floating Point Exceptions have been reported + 1,884,361,235 cycles # 2.811 GHz + 2,662,129,036 instructions # 1.41 insn per cycle + 0.727401829 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 248 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.381856e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.386346e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.451907e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.630099e+02 +- 4.770719e+02 ) GeV^-2 +TOTAL : 0.498921 sec +INFO: No Floating Point Exceptions have been reported + 2,065,776,106 cycles # 2.820 GHz + 3,002,526,593 instructions # 1.45 insn per cycle + 0.789720140 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.412608e+00 +Avg ME (F77/GPU) = 1.4132214346515752 +Relative difference = 0.00043425681546129636 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.327802e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.344804e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.344804e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.724764e+02 +- 2.665343e+02 ) GeV^-2 -TOTAL : 4.947724 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.479714e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.492704e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.492704e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 6.641350 sec INFO: No Floating Point Exceptions have been reported - 17,302,620,334 cycles # 3.496 GHz - 58,674,598,268 instructions # 3.39 insn per cycle - 4.949311337 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1024) (avx2: 0) (512y: 0) (512z: 0) + 18,978,826,784 cycles # 2.861 GHz + 58,704,221,037 instructions # 3.09 insn per cycle + 6.645410970 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1029) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.412986e+00 Avg ME (F77/C++) = 1.4129858051842916 Relative difference = 1.3787518662898538e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.166408e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.187803e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.187803e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.724758e+02 +- 2.665339e+02 ) GeV^-2 -TOTAL : 1.427590 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.494310e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.651898e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.651898e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 1.950028 sec INFO: No Floating Point Exceptions have been reported - 4,998,564,906 cycles # 3.498 GHz - 16,503,246,104 instructions # 3.30 insn per cycle - 1.429196094 seconds time elapsed + 5,589,974,968 cycles # 2.862 GHz + 16,510,304,699 instructions # 2.95 insn per cycle + 1.954264273 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 5551) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.412987e+00 Avg ME (F77/C++) = 1.4129865669244737 Relative difference = 3.06496469061158e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.136581e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.209217e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.209217e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743722e+02 +- 2.676604e+02 ) GeV^-2 -TOTAL : 0.789359 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.496639e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.543532e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.543532e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 +TOTAL : 1.116418 sec INFO: No Floating Point Exceptions have been reported - 2,614,043,577 cycles # 3.306 GHz - 6,621,157,777 instructions # 2.53 insn per cycle - 0.790993368 seconds time elapsed + 2,975,820,242 cycles # 2.657 GHz + 6,633,799,194 instructions # 2.23 insn per cycle + 1.120575232 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5568) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 Avg ME (F77/C++) = 1.4133161655815059 Relative difference = 1.1715816267550621e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.275700e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.351645e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.351645e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743722e+02 +- 2.676604e+02 ) GeV^-2 -TOTAL : 0.743100 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.615016e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.669374e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.669374e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 +TOTAL : 1.036246 sec INFO: No Floating Point Exceptions have been reported - 2,460,811,549 cycles # 3.306 GHz - 6,244,546,313 instructions # 2.54 insn per cycle - 0.744691831 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5305) (512y: 11) (512z: 0) + 2,759,204,529 cycles # 2.654 GHz + 6,255,102,481 instructions # 2.27 insn per cycle + 1.040401186 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5279) (512y: 25) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 Avg ME (F77/C++) = 1.4133161655815059 Relative difference = 1.1715816267550621e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.470657e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.656155e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.656155e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743730e+02 +- 2.676609e+02 ) GeV^-2 -TOTAL : 0.495123 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.286831e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.322123e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.322123e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 1.300128 sec INFO: No Floating Point Exceptions have been reported - 1,640,921,568 cycles # 3.305 GHz - 3,669,320,401 instructions # 2.24 insn per cycle - 0.496729732 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2402) (512y: 9) (512z: 3969) + 2,231,395,652 cycles # 1.715 GHz + 3,699,704,768 instructions # 1.66 insn per cycle + 1.304305216 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2391) (512y: 29) (512z: 3970) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 Avg ME (F77/C++) = 1.4133164033579249 Relative difference = 2.85398258307829e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index fe8c55c044..18990368c8 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -1,183 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-16_14:57:46 +DATE: 2024-05-16_14:38:52 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.980776e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.047318e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.059891e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.469526 sec +INFO: No Floating Point Exceptions have been reported + 1,950,532,568 cycles # 2.815 GHz + 2,802,706,395 instructions # 1.44 insn per cycle + 0.749158155 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.120585e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.317479e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.329114e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.606344 sec +INFO: No Floating Point Exceptions have been reported + 2,403,151,636 cycles # 2.824 GHz + 3,669,339,361 instructions # 1.53 insn per cycle + 0.910110717 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.413122e+00 +Avg ME (F77/GPU) = 1.4131213755569487 +Relative difference = 4.418889885423659e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.157731e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.172249e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.172249e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 5.214643 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.348054e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.359694e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.359694e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 7.005029 sec INFO: No Floating Point Exceptions have been reported - 18,242,480,003 cycles # 3.498 GHz - 60,528,165,591 instructions # 3.32 insn per cycle - 5.216356655 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1404) (avx2: 0) (512y: 0) (512z: 0) + 20,055,951,018 cycles # 2.863 GHz + 60,536,467,053 instructions # 3.02 insn per cycle + 7.009312607 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1399) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213859069593 Relative difference = 4.345647726386255e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 6.323704e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.380967e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.380967e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 2.615582 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.638770e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.684822e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.684822e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.556206 sec INFO: No Floating Point Exceptions have been reported - 9,151,647,030 cycles # 3.497 GHz - 30,371,965,788 instructions # 3.32 insn per cycle - 2.617287619 seconds time elapsed + 10,186,602,629 cycles # 2.862 GHz + 30,386,009,701 instructions # 2.98 insn per cycle + 3.560429335 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 5280) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213792564823 Relative difference = 4.392710025734405e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.309588e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.332285e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.332285e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.275161 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.050822e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.223334e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.223334e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.833811 sec INFO: No Floating Point Exceptions have been reported - 4,218,419,523 cycles # 3.305 GHz - 10,962,823,763 instructions # 2.60 insn per cycle - 1.276815643 seconds time elapsed + 4,877,548,863 cycles # 2.655 GHz + 10,978,535,397 instructions # 2.25 insn per cycle + 1.838126466 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4624) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213600217192 Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.409942e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.436943e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.436943e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.186720 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.034701e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.056812e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.056812e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.607005 sec INFO: No Floating Point Exceptions have been reported - 3,926,467,492 cycles # 3.305 GHz - 10,288,751,023 instructions # 2.62 insn per cycle - 1.188400202 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4362) (512y: 48) (512z: 0) + 4,285,859,041 cycles # 2.661 GHz + 10,248,085,853 instructions # 2.39 insn per cycle + 1.611327735 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4280) (512y: 82) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213600217192 Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.835113e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.880105e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.880105e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 0.917845 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.675038e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.769490e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.769490e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.480681 sec INFO: No Floating Point Exceptions have been reported - 3,037,911,511 cycles # 3.305 GHz - 5,995,105,780 instructions # 1.97 insn per cycle - 0.919563401 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2070) (512y: 85) (512z: 3537) + 4,211,204,679 cycles # 1.695 GHz + 6,044,041,090 instructions # 1.44 insn per cycle + 2.485018889 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2066) (512y: 117) (512z: 3540) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213786174055 Relative difference = 4.3972324717191576e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt index ea2522f723..bea6b18082 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt @@ -1,183 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-16_14:57:59 +DATE: 2024-05-16_14:39:18 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.940348e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.041869e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.054764e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.469154 sec +INFO: No Floating Point Exceptions have been reported + 1,946,414,728 cycles # 2.818 GHz + 2,803,423,086 instructions # 1.44 insn per cycle + 0.748059256 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.116866e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.312173e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.323463e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.604157 sec +INFO: No Floating Point Exceptions have been reported + 2,374,249,289 cycles # 2.818 GHz + 3,602,148,119 instructions # 1.52 insn per cycle + 0.902621411 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.413122e+00 +Avg ME (F77/GPU) = 1.4131213755569487 +Relative difference = 4.418889885423659e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.201508e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.216429e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.216429e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 5.143575 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.368504e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.380280e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.380280e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.941323 sec INFO: No Floating Point Exceptions have been reported - 17,998,527,032 cycles # 3.498 GHz - 59,876,955,320 instructions # 3.33 insn per cycle - 5.145249276 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1262) (avx2: 0) (512y: 0) (512z: 0) + 19,878,296,626 cycles # 2.863 GHz + 59,936,362,271 instructions # 3.02 insn per cycle + 6.945573140 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1276) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213859069593 Relative difference = 4.345647726386255e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 6.348649e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.406269e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.406269e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 2.605142 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.689994e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.736297e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.736297e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.516340 sec INFO: No Floating Point Exceptions have been reported - 9,114,931,082 cycles # 3.497 GHz - 30,051,323,374 instructions # 3.30 insn per cycle - 2.606780461 seconds time elapsed + 10,077,314,757 cycles # 2.863 GHz + 30,098,117,657 instructions # 2.99 insn per cycle + 3.520635536 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 5082) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213792564823 Relative difference = 4.392710025734405e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.251964e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.272881e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.272881e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.332616 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.778247e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.940877e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.940877e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.889938 sec INFO: No Floating Point Exceptions have been reported - 4,407,815,981 cycles # 3.304 GHz - 11,447,290,279 instructions # 2.60 insn per cycle - 1.334290522 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4717) (512y: 0) (512z: 0) + 5,023,754,472 cycles # 2.654 GHz + 11,483,522,538 instructions # 2.29 insn per cycle + 1.894205310 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4723) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213600217192 Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.341500e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.365518e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.365518e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.245709 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.644687e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.842226e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.842226e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.722080 sec INFO: No Floating Point Exceptions have been reported - 4,121,210,175 cycles # 3.305 GHz - 10,792,918,022 instructions # 2.62 insn per cycle - 1.247343751 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4317) (512y: 220) (512z: 0) + 4,590,091,342 cycles # 2.660 GHz + 10,809,457,257 instructions # 2.35 insn per cycle + 1.726406566 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4285) (512y: 234) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213600217192 Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.840437e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.885679e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.885679e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 0.914759 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.641517e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.735645e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.735645e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.492729 sec INFO: No Floating Point Exceptions have been reported - 3,024,372,236 cycles # 3.301 GHz - 6,223,411,017 instructions # 2.06 insn per cycle - 0.916440024 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1968) (512y: 142) (512z: 3615) + 4,229,101,372 cycles # 1.695 GHz + 6,273,394,761 instructions # 1.48 insn per cycle + 2.496999493 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1961) (512y: 163) (512z: 3617) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213786174055 Relative difference = 4.3972324717191576e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index 2c0f8eb99e..adf6424639 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -1,183 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_14:58:12 +DATE: 2024-05-16_14:39:44 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.453895e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.477096e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.479397e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.529744 sec +INFO: No Floating Point Exceptions have been reported + 2,179,317,048 cycles # 2.822 GHz + 3,403,036,461 instructions # 1.56 insn per cycle + 0.830470867 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.124157e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.151338e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.152519e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.042150 sec +INFO: No Floating Point Exceptions have been reported + 9,405,604,432 cycles # 2.853 GHz + 20,118,562,201 instructions # 2.14 insn per cycle + 3.353608047 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626675e-04 +Avg ME (F77/GPU) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.489421e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.490651e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.490651e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 6.595744 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.820592e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.821434e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.821434e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 9.018372 sec INFO: No Floating Point Exceptions have been reported - 23,075,412,130 cycles # 3.498 GHz - 78,768,945,704 instructions # 3.41 insn per cycle - 6.597385330 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4856) (avx2: 0) (512y: 0) (512z: 0) + 25,614,013,948 cycles # 2.839 GHz + 78,938,013,495 instructions # 3.08 insn per cycle + 9.022664733 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4893) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.824949e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.829515e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.829515e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.406808 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.519494e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.522699e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.522699e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.669138 sec INFO: No Floating Point Exceptions have been reported - 11,657,794,705 cycles # 3.421 GHz - 39,273,323,651 instructions # 3.37 insn per cycle - 3.408459486 seconds time elapsed + 12,898,966,245 cycles # 2.761 GHz + 39,280,150,365 instructions # 3.05 insn per cycle + 4.673492352 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13184) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.090711e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.092872e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.092872e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.510847 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.859599e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.875346e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.875346e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.097013 sec INFO: No Floating Point Exceptions have been reported - 4,990,714,987 cycles # 3.300 GHz - 13,680,690,805 instructions # 2.74 insn per cycle - 1.512580097 seconds time elapsed + 5,574,685,577 cycles # 2.655 GHz + 13,685,856,406 instructions # 2.46 insn per cycle + 2.101249976 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.278297e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.281313e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.281313e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.290325 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.915800e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.935807e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.935807e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.848754 sec INFO: No Floating Point Exceptions have been reported - 4,262,825,132 cycles # 3.300 GHz - 12,349,048,684 instructions # 2.90 insn per cycle - 1.292007253 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10349) (512y: 54) (512z: 0) + 4,887,101,603 cycles # 2.639 GHz + 12,341,123,817 instructions # 2.53 insn per cycle + 1.853060894 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.025518e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.033177e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.033177e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.817137 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.728417e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.739729e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.739729e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.446881 sec INFO: No Floating Point Exceptions have been reported - 2,698,939,507 cycles # 3.297 GHz - 6,319,480,914 instructions # 2.34 insn per cycle - 0.818836639 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1825) (512y: 70) (512z: 9372) + 4,107,098,137 cycles # 1.676 GHz + 6,336,202,498 instructions # 1.54 insn per cycle + 2.451096147 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt index 4cd7fe1c8c..92636e2555 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt @@ -1,188 +1,246 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_15:13:33 +DATE: 2024-05-16_15:02:53 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.094987e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.434034e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.434034e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.523594 sec +INFO: No Floating Point Exceptions have been reported + 2,118,517,608 cycles # 2.813 GHz + 3,348,276,596 instructions # 1.58 insn per cycle + 0.813391390 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.622834e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.121853e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.121853e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.329713 sec +INFO: No Floating Point Exceptions have been reported + 10,291,111,145 cycles # 2.854 GHz + 21,714,903,322 instructions # 2.11 insn per cycle + 3.660758937 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626675e-04 +Avg ME (F77/GPU) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.485473e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.486697e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.486697e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 6.608584 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.836126e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.837051e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.837051e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.946025 sec INFO: No Floating Point Exceptions have been reported - 23,128,930,918 cycles # 3.499 GHz - 78,774,341,287 instructions # 3.41 insn per cycle - 6.610423185 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4856) (avx2: 0) (512y: 0) (512z: 0) + 25,625,027,072 cycles # 2.863 GHz + 78,943,584,564 instructions # 3.08 insn per cycle + 8.950491990 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4893) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.828492e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.833038e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.833038e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.407449 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.512313e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.515690e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.515690e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.682891 sec INFO: No Floating Point Exceptions have been reported - 11,665,541,287 cycles # 3.423 GHz - 39,286,544,282 instructions # 3.37 insn per cycle - 3.409284195 seconds time elapsed + 12,903,818,271 cycles # 2.754 GHz + 39,293,324,950 instructions # 3.05 insn per cycle + 4.687529036 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13184) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.132223e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.134549e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.134549e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.458741 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.867831e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.884189e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.884189e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.098804 sec INFO: No Floating Point Exceptions have been reported - 4,819,793,264 cycles # 3.300 GHz - 13,690,800,219 instructions # 2.84 insn per cycle - 1.460705015 seconds time elapsed + 5,587,651,201 cycles # 2.658 GHz + 13,696,262,775 instructions # 2.45 insn per cycle + 2.103410758 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.277581e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.280637e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.280637e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.293724 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.952196e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.973818e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.973818e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.845912 sec INFO: No Floating Point Exceptions have been reported - 4,274,915,564 cycles # 3.300 GHz - 12,358,080,501 instructions # 2.89 insn per cycle - 1.295644687 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10349) (512y: 54) (512z: 0) + 4,903,860,646 cycles # 2.651 GHz + 12,352,108,328 instructions # 2.52 insn per cycle + 1.850421022 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.025590e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.033186e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.033186e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.819621 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.711524e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.723541e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.723541e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.457485 sec INFO: No Floating Point Exceptions have been reported - 2,709,492,339 cycles # 3.299 GHz - 6,329,068,444 instructions # 2.34 insn per cycle - 0.821511880 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1825) (512y: 70) (512z: 9372) + 4,130,677,154 cycles # 1.678 GHz + 6,346,127,118 instructions # 1.54 insn per cycle + 2.462055019 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt index 0602847f29..07bc3b6c73 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt @@ -1,183 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_15:17:10 +DATE: 2024-05-16_15:12:42 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.490501e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.518177e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.520849e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 0.515008 sec +INFO: No Floating Point Exceptions have been reported + 2,117,861,647 cycles # 2.847 GHz + 3,355,581,223 instructions # 1.58 insn per cycle + 0.805282012 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.120060e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.152876e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.154244e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 +TOTAL : 3.146209 sec +INFO: No Floating Point Exceptions have been reported + 9,794,350,225 cycles # 2.878 GHz + 20,567,996,876 instructions # 2.10 insn per cycle + 3.458179285 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626675e-04 +Avg ME (F77/GPU) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.490303e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.491536e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.491536e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.854249e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.855163e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.855163e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 6.593708 sec +TOTAL : 8.856220 sec INFO: No Floating Point Exceptions have been reported - 23,068,308,718 cycles # 3.498 GHz - 78,768,722,727 instructions # 3.41 insn per cycle - 6.595351939 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4856) (avx2: 0) (512y: 0) (512z: 0) + 25,606,958,110 cycles # 2.890 GHz + 78,936,876,492 instructions # 3.08 insn per cycle + 8.860490718 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4893) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.830448e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.835008e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.835008e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.547585e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.550823e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.550823e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.402743 sec +TOTAL : 4.633866 sec INFO: No Floating Point Exceptions have been reported - 11,658,227,607 cycles # 3.425 GHz - 39,273,680,832 instructions # 3.37 insn per cycle - 3.404413023 seconds time elapsed + 12,886,616,952 cycles # 2.779 GHz + 39,279,548,039 instructions # 3.05 insn per cycle + 4.638052623 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13184) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.132716e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.135051e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.135051e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.950793e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.966539e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.966539e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.455134 sec +TOTAL : 2.073967 sec INFO: No Floating Point Exceptions have been reported - 4,806,609,884 cycles # 3.300 GHz - 13,680,092,385 instructions # 2.85 insn per cycle - 1.456788218 seconds time elapsed + 5,577,712,569 cycles # 2.685 GHz + 13,684,498,611 instructions # 2.45 insn per cycle + 2.078154877 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.278350e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.281359e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.281359e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.068596e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.089664e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.089664e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.290262 sec +TOTAL : 1.820316 sec INFO: No Floating Point Exceptions have been reported - 4,262,565,757 cycles # 3.300 GHz - 12,349,006,230 instructions # 2.90 insn per cycle - 1.291919117 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10349) (512y: 54) (512z: 0) + 4,894,997,970 cycles # 2.684 GHz + 12,339,079,686 instructions # 2.52 insn per cycle + 1.824557454 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.026526e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.034134e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.034134e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.817590e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.829323e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.829323e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.816626 sec +TOTAL : 2.417091 sec INFO: No Floating Point Exceptions have been reported - 2,696,368,067 cycles # 3.296 GHz - 6,319,179,543 instructions # 2.34 insn per cycle - 0.818275093 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1825) (512y: 70) (512z: 9372) + 4,131,104,953 cycles # 1.707 GHz + 6,332,486,091 instructions # 1.53 insn per cycle + 2.421265188 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt index d568f8fe7a..0a65f9fefe 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt @@ -1,128 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_15:16:10 +DATE: 2024-05-16_15:09:55 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.458490e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.485387e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.487802e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.511372 sec +INFO: No Floating Point Exceptions have been reported + 2,125,154,213 cycles # 2.818 GHz + 3,305,948,128 instructions # 1.56 insn per cycle + 0.811831996 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.112497e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.145168e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.146541e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.091906 sec +INFO: No Floating Point Exceptions have been reported + 9,555,297,501 cycles # 2.852 GHz + 20,467,928,496 instructions # 2.14 insn per cycle + 3.408325542 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626675e-04 +Avg ME (F77/GPU) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= -/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe: Aborted - 4,205,713 cycles # 3.250 GHz - 6,350,306 instructions # 1.51 insn per cycle - 0.037928460 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4856) (avx2: 0) (512y: 0) (512z: 0) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.835837e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.836698e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.836698e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.943298 sec +INFO: No Floating Point Exceptions have been reported + 25,616,203,937 cycles # 2.864 GHz + 78,941,981,933 instructions # 3.08 insn per cycle + 8.947377666 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4893) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= -/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe: Aborted - 4,091,396 cycles # 3.246 GHz - 6,357,121 instructions # 1.55 insn per cycle - 0.038128326 seconds time elapsed +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.509219e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.512397e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.512397e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.682859 sec +INFO: No Floating Point Exceptions have been reported + 12,889,261,061 cycles # 2.751 GHz + 39,280,374,746 instructions # 3.05 insn per cycle + 4.687212544 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13184) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= -/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe: Aborted - 4,461,358 cycles # 3.221 GHz - 6,356,202 instructions # 1.42 insn per cycle - 0.038835441 seconds time elapsed +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.873155e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.889044e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.889044e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.092539 sec +INFO: No Floating Point Exceptions have been reported + 5,573,290,015 cycles # 2.659 GHz + 13,685,575,452 instructions # 2.46 insn per cycle + 2.096738730 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= -/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe: Aborted - 4,221,853 cycles # 3.251 GHz - 6,343,286 instructions # 1.50 insn per cycle - 0.039304724 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10349) (512y: 54) (512z: 0) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.869803e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.890096e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.890096e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.858817 sec +INFO: No Floating Point Exceptions have been reported + 4,890,594,740 cycles # 2.626 GHz + 12,341,872,390 instructions # 2.52 insn per cycle + 1.863321950 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= -/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe: Aborted - 4,103,834 cycles # 3.247 GHz - 6,364,919 instructions # 1.55 insn per cycle - 0.038546564 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1825) (512y: 70) (512z: 9372) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.721792e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.733746e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.733746e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.449544 sec +INFO: No Floating Point Exceptions have been reported + 4,111,968,902 cycles # 1.676 GHz + 6,335,563,564 instructions # 1.54 insn per cycle + 2.453951471 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt index 894e5462b0..b300efd9c0 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt @@ -1,183 +1,232 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_15:15:34 +DATE: 2024-05-16_15:07:12 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.175456e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.487401e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.489887e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.516778 sec +INFO: No Floating Point Exceptions have been reported + 2,106,838,284 cycles # 2.817 GHz + 3,334,047,065 instructions # 1.58 insn per cycle + 0.806903831 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.725415e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.181222e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.182613e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.208931 sec +INFO: No Floating Point Exceptions have been reported + 9,884,616,856 cycles # 2.852 GHz + 22,569,706,597 instructions # 2.28 insn per cycle + 3.521271497 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626675e-04 +Avg ME (F77/GPU) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.490893e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.492126e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.492126e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 6.591741 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.838805e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.839662e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.839662e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.928794 sec INFO: No Floating Point Exceptions have been reported - 23,066,107,487 cycles # 3.499 GHz - 78,768,756,399 instructions # 3.41 insn per cycle - 6.593333180 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4856) (avx2: 0) (512y: 0) (512z: 0) + 25,578,535,475 cycles # 2.864 GHz + 78,941,438,017 instructions # 3.09 insn per cycle + 8.932959256 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4893) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.837638e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.842159e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.842159e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.397502 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.490750e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.493870e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.493870e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.708141 sec INFO: No Floating Point Exceptions have been reported - 11,654,233,176 cycles # 3.429 GHz - 39,272,830,844 instructions # 3.37 insn per cycle - 3.399090650 seconds time elapsed + 12,873,433,154 cycles # 2.733 GHz + 39,280,620,994 instructions # 3.05 insn per cycle + 4.712353785 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13184) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.129984e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.132298e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.132298e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.458580 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.853620e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.869024e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.869024e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.097850 sec INFO: No Floating Point Exceptions have been reported - 4,817,959,562 cycles # 3.300 GHz - 13,680,105,128 instructions # 2.84 insn per cycle - 1.460248562 seconds time elapsed + 5,573,477,429 cycles # 2.652 GHz + 13,685,909,410 instructions # 2.46 insn per cycle + 2.102047066 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.278698e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.281715e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.281715e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.289991 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.970623e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.991129e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.991129e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.837793 sec INFO: No Floating Point Exceptions have been reported - 4,261,761,014 cycles # 3.300 GHz - 12,348,842,997 instructions # 2.90 insn per cycle - 1.291662517 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10349) (512y: 54) (512z: 0) + 4,885,535,539 cycles # 2.653 GHz + 12,340,762,979 instructions # 2.53 insn per cycle + 1.841998870 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.025908e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.033553e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.033553e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.816861 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.715803e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.727367e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.727367e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.451983 sec INFO: No Floating Point Exceptions have been reported - 2,697,378,644 cycles # 3.297 GHz - 6,319,220,581 instructions # 2.34 insn per cycle - 0.818512324 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1825) (512y: 70) (512z: 9372) + 4,110,713,398 cycles # 1.674 GHz + 6,334,867,690 instructions # 1.54 insn per cycle + 2.456147392 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt index 0454e77e5b..254c65fd8c 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt @@ -1,183 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_14:58:27 +DATE: 2024-05-16_14:40:17 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.472040e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.495257e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.497568e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.530193 sec +INFO: No Floating Point Exceptions have been reported + 2,179,825,483 cycles # 2.820 GHz + 3,416,926,116 instructions # 1.57 insn per cycle + 0.832303660 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.149957e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.177471e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.178689e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.025570 sec +INFO: No Floating Point Exceptions have been reported + 9,343,829,120 cycles # 2.851 GHz + 20,017,847,921 instructions # 2.14 insn per cycle + 3.337093329 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626675e-04 +Avg ME (F77/GPU) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.503400e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.504651e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.504651e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 6.559421 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.844549e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.845438e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.845438e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.900941 sec INFO: No Floating Point Exceptions have been reported - 22,949,032,197 cycles # 3.498 GHz - 78,473,114,104 instructions # 3.42 insn per cycle - 6.560988078 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4246) (avx2: 0) (512y: 0) (512z: 0) + 25,492,945,375 cycles # 2.863 GHz + 78,715,017,784 instructions # 3.09 insn per cycle + 8.905151100 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4264) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.747589e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.751964e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.751964e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.461838 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.432714e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.435728e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.435728e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.786568 sec INFO: No Floating Point Exceptions have been reported - 11,694,264,024 cycles # 3.377 GHz - 39,190,674,892 instructions # 3.35 insn per cycle - 3.463480087 seconds time elapsed + 12,968,671,480 cycles # 2.709 GHz + 39,227,279,421 instructions # 3.02 insn per cycle + 4.790848376 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:12951) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.141112e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.143469e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.143469e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.444335 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.791500e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.806568e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.806568e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.114108 sec INFO: No Floating Point Exceptions have been reported - 4,771,097,381 cycles # 3.300 GHz - 13,779,678,730 instructions # 2.89 insn per cycle - 1.446009503 seconds time elapsed + 5,617,875,214 cycles # 2.653 GHz + 13,801,216,605 instructions # 2.46 insn per cycle + 2.118326582 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11422) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.265100e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.268090e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.268090e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.303635 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.808696e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.827867e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.827867e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.871093 sec INFO: No Floating Point Exceptions have been reported - 4,306,878,290 cycles # 3.300 GHz - 12,450,724,323 instructions # 2.89 insn per cycle - 1.305387210 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10290) (512y: 226) (512z: 0) + 4,977,184,975 cycles # 2.656 GHz + 12,467,160,434 instructions # 2.50 insn per cycle + 1.875328468 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10258) (512y: 240) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.025561e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.033157e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.033157e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.816874 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.708154e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.719459e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.719459e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.454314 sec INFO: No Floating Point Exceptions have been reported - 2,699,603,241 cycles # 3.299 GHz - 6,439,988,180 instructions # 2.39 insn per cycle - 0.818482763 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1658) (512y: 182) (512z: 9374) + 4,118,637,907 cycles # 1.676 GHz + 6,458,862,875 instructions # 1.57 insn per cycle + 2.458530246 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1647) (512y: 192) (512z: 9375) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt index dfc931b6b1..452f4e853d 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt @@ -1,183 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_15:08:10 +DATE: 2024-05-16_14:53:22 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.253411e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.278108e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.280152e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.538797 sec +INFO: No Floating Point Exceptions have been reported + 2,198,780,840 cycles # 2.857 GHz + 3,392,092,682 instructions # 1.54 insn per cycle + 0.826434194 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.756018e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.782691e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.783822e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.316768 sec +INFO: No Floating Point Exceptions have been reported + 10,315,360,608 cycles # 2.881 GHz + 23,624,745,879 instructions # 2.29 insn per cycle + 3.638219909 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626675e-04 +Avg ME (F77/GPU) = 6.6266731198158122E-004 +Relative difference = 2.837296513854949e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 5.014138e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.014639e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.014639e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 32.715828 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.179521e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.179964e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.179964e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 39.248920 sec INFO: No Floating Point Exceptions have been reported - 114,390,740,913 cycles # 3.496 GHz - 143,868,213,956 instructions # 1.26 insn per cycle - 32.717534500 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:21197) (avx2: 0) (512y: 0) (512z: 0) + 113,511,319,041 cycles # 2.892 GHz + 144,820,446,927 instructions # 1.28 insn per cycle + 39.253177511 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:21353) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198140450E-004 Relative difference = 2.83729918072716e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.957067e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.960134e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.960134e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 4.152140 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.047626e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.050057e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.050057e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 5.390362 sec INFO: No Floating Point Exceptions have been reported - 14,524,143,789 cycles # 3.497 GHz - 37,569,009,758 instructions # 2.59 insn per cycle - 4.153813868 seconds time elapsed + 14,740,564,650 cycles # 2.733 GHz + 37,575,494,329 instructions # 2.55 insn per cycle + 5.394647902 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:68119) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141209E-004 Relative difference = 2.8372990661989057e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 9.045014e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.059776e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.059776e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.820488 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.230737e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.243892e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.243892e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.278344 sec INFO: No Floating Point Exceptions have been reported - 6,012,459,236 cycles # 3.300 GHz - 13,056,998,383 instructions # 2.17 insn per cycle - 1.822199241 seconds time elapsed + 6,134,003,628 cycles # 2.689 GHz + 13,061,930,844 instructions # 2.13 insn per cycle + 2.282738143 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:46960) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198156789E-004 Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.099365e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.101589e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.101589e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.499142 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.779670e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.799133e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.799133e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.877722 sec INFO: No Floating Point Exceptions have been reported - 4,951,930,645 cycles # 3.300 GHz - 11,434,500,619 instructions # 2.31 insn per cycle - 1.500826567 seconds time elapsed + 5,068,047,565 cycles # 2.694 GHz + 11,440,450,267 instructions # 2.26 insn per cycle + 1.882139324 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:40434) (512y: 285) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198156789E-004 Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.981779e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.989097e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.989097e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.834999 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.093705e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.106755e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.106755e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.321771 sec INFO: No Floating Point Exceptions have been reported - 2,755,403,907 cycles # 3.294 GHz - 5,932,997,077 instructions # 2.15 insn per cycle - 0.836712944 seconds time elapsed + 3,974,444,581 cycles # 1.709 GHz + 5,942,873,144 instructions # 1.50 insn per cycle + 2.326156002 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2455) (512y: 337) (512z:39411) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198156789E-004 Relative difference = 2.837296715097453e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt index 70bd8c4f55..00ea23e18d 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt @@ -1,183 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_15:08:54 +DATE: 2024-05-16_14:54:29 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.259147e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.284136e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.286360e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.536782 sec +INFO: No Floating Point Exceptions have been reported + 2,193,506,190 cycles # 2.857 GHz + 3,337,314,407 instructions # 1.52 insn per cycle + 0.824492176 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.761556e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.788263e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.789425e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.301197 sec +INFO: No Floating Point Exceptions have been reported + 10,264,886,616 cycles # 2.886 GHz + 23,377,018,059 instructions # 2.28 insn per cycle + 3.615104997 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626675e-04 +Avg ME (F77/GPU) = 6.6266731198158122E-004 +Relative difference = 2.837296513854949e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.978402e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.978892e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.978892e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 32.950642 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.170908e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.171353e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.171353e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 39.330349 sec INFO: No Floating Point Exceptions have been reported - 115,209,470,120 cycles # 3.496 GHz - 143,948,414,796 instructions # 1.25 insn per cycle - 32.952388317 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:20527) (avx2: 0) (512y: 0) (512z: 0) + 113,688,017,774 cycles # 2.891 GHz + 144,788,018,158 instructions # 1.27 insn per cycle + 39.334720458 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:20719) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198140461E-004 Relative difference = 2.8372991790910424e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.845307e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.848117e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.848117e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 4.272369 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.974783e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.977013e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.977013e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 5.522774 sec INFO: No Floating Point Exceptions have been reported - 14,943,641,234 cycles # 3.497 GHz - 37,756,143,792 instructions # 2.53 insn per cycle - 4.274057848 seconds time elapsed + 15,220,566,650 cycles # 2.755 GHz + 37,763,046,074 instructions # 2.48 insn per cycle + 5.527045303 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:68447) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141209E-004 Relative difference = 2.8372990661989057e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 9.202487e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.218094e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.218094e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.789478 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.412795e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.426610e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.426610e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.222010 sec INFO: No Floating Point Exceptions have been reported - 5,910,142,625 cycles # 3.300 GHz - 12,891,193,600 instructions # 2.18 insn per cycle - 1.791231685 seconds time elapsed + 6,000,419,836 cycles # 2.696 GHz + 12,896,174,142 instructions # 2.15 insn per cycle + 2.226315650 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:45929) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198156789E-004 Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.087781e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.089982e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.089982e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.515051 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.743711e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.762861e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.762861e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.885038 sec INFO: No Floating Point Exceptions have been reported - 5,004,419,408 cycles # 3.300 GHz - 11,441,178,201 instructions # 2.29 insn per cycle - 1.516742615 seconds time elapsed + 5,086,798,971 cycles # 2.694 GHz + 11,447,968,989 instructions # 2.25 insn per cycle + 1.889284279 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:40123) (512y: 219) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198156789E-004 Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.012442e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.019869e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.019869e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.822199 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.141072e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.153903e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.153903e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.306249 sec INFO: No Floating Point Exceptions have been reported - 2,717,663,220 cycles # 3.300 GHz - 5,886,986,357 instructions # 2.17 insn per cycle - 0.823883432 seconds time elapsed + 3,947,559,408 cycles # 1.709 GHz + 5,896,754,674 instructions # 1.49 insn per cycle + 2.310527958 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1971) (512y: 259) (512z:38937) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198156789E-004 Relative difference = 2.837296715097453e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index a84c1fb528..15bbe59069 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -1,183 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_14:58:43 +DATE: 2024-05-16_14:40:50 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.326887e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.370559e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.376371e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 +TOTAL : 0.487782 sec +INFO: No Floating Point Exceptions have been reported + 1,984,813,926 cycles # 2.807 GHz + 2,933,686,219 instructions # 1.48 insn per cycle + 0.764328783 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.584549e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.644337e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.647136e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 +TOTAL : 1.720884 sec +INFO: No Floating Point Exceptions have been reported + 5,560,374,951 cycles # 2.849 GHz + 11,900,809,748 instructions # 2.14 insn per cycle + 2.008088048 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262659968156085E-004 +Relative difference = 2.8371612387547027e-05 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.549079e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.550339e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.550339e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 6.440693 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.909633e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.910547e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.910547e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 +TOTAL : 8.596251 sec INFO: No Floating Point Exceptions have been reported - 22,534,025,954 cycles # 3.498 GHz - 78,108,281,721 instructions # 3.47 insn per cycle - 6.442208141 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3570) (avx2: 0) (512y: 0) (512z: 0) + 24,624,004,022 cycles # 2.864 GHz + 78,129,381,217 instructions # 3.17 insn per cycle + 8.600293639 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3603) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274863266294753E-004 Relative difference = 4.92840687132121e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 9.776589e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.795572e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.795572e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 -TOTAL : 1.683962 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.891953e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.904635e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.904635e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 +TOTAL : 2.387612 sec INFO: No Floating Point Exceptions have been reported - 5,703,951,901 cycles # 3.385 GHz - 20,115,692,662 instructions # 3.53 insn per cycle - 1.685559639 seconds time elapsed + 6,469,659,104 cycles # 2.706 GHz + 20,120,611,338 instructions # 3.11 insn per cycle + 2.391816623 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274861460025036E-004 Relative difference = 2.2029847170826283e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.257616e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.267567e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.267567e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.732747 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.562010e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.568248e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.568248e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 1.059004 sec INFO: No Floating Point Exceptions have been reported - 2,422,509,141 cycles # 3.300 GHz - 6,983,673,080 instructions # 2.88 insn per cycle - 0.734334245 seconds time elapsed + 2,818,181,262 cycles # 2.654 GHz + 6,988,460,270 instructions # 2.48 insn per cycle + 1.063195979 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174574524E-004 Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.510971e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.523230e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.523230e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.659531 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.763183e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.771185e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.771185e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.938662 sec INFO: No Floating Point Exceptions have been reported - 2,180,863,659 cycles # 3.300 GHz - 6,297,386,644 instructions # 2.89 insn per cycle - 0.661145359 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10903) (512y: 9) (512z: 0) + 2,488,393,509 cycles # 2.641 GHz + 6,295,244,635 instructions # 2.53 insn per cycle + 0.942828770 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174574524E-004 Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.089937e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.122434e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.122434e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 0.407286 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.363218e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.368048e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.368048e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 +TOTAL : 1.211358 sec INFO: No Floating Point Exceptions have been reported - 1,347,738,835 cycles # 3.299 GHz - 3,256,621,391 instructions # 2.42 insn per cycle - 0.408802321 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2440) (512y: 14) (512z: 9577) + 2,044,658,355 cycles # 1.683 GHz + 3,265,998,063 instructions # 1.60 insn per cycle + 1.215542758 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2427) (512y: 46) (512z: 9578) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627195e-04 Avg ME (F77/C++) = 6.6271952779373838E-004 Relative difference = 4.193891735414155e-08 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt index b381877daf..e281ad389f 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt @@ -1,188 +1,246 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_15:13:49 +DATE: 2024-05-16_15:03:26 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.615502e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.322427e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.322427e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 +TOTAL : 0.477115 sec +INFO: No Floating Point Exceptions have been reported + 1,936,349,619 cycles # 2.809 GHz + 2,877,179,431 instructions # 1.49 insn per cycle + 0.747561501 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.243623e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.556013e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.556013e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.641710e+00 +- 4.994249e+00 ) GeV^-4 +TOTAL : 1.907816 sec +INFO: No Floating Point Exceptions have been reported + 6,131,638,198 cycles # 2.845 GHz + 12,981,768,605 instructions # 2.12 insn per cycle + 2.213144159 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262659968156085E-004 +Relative difference = 2.8371612387547027e-05 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.548514e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.549769e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.549769e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 6.443701 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.909165e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.910120e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.910120e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 +TOTAL : 8.600727 sec INFO: No Floating Point Exceptions have been reported - 22,537,197,399 cycles # 3.497 GHz - 78,113,205,947 instructions # 3.47 insn per cycle - 6.445374920 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3570) (avx2: 0) (512y: 0) (512z: 0) + 24,637,778,479 cycles # 2.864 GHz + 78,132,610,249 instructions # 3.17 insn per cycle + 8.604942209 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3603) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274863266294753E-004 Relative difference = 4.92840687132121e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 9.794631e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.813778e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.813778e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 -TOTAL : 1.682840 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.457452e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.468775e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.468775e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 +TOTAL : 2.550311 sec INFO: No Floating Point Exceptions have been reported - 5,714,389,513 cycles # 3.393 GHz - 20,124,926,959 instructions # 3.52 insn per cycle - 1.684605533 seconds time elapsed + 6,935,275,139 cycles # 2.716 GHz + 20,130,100,658 instructions # 2.90 insn per cycle + 2.554710358 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274861460025036E-004 Relative difference = 2.2029847170826283e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.258274e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.268048e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.268048e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.734277 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.550561e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.557160e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.557160e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 1.068476 sec INFO: No Floating Point Exceptions have been reported - 2,428,011,896 cycles # 3.300 GHz - 6,993,718,495 instructions # 2.88 insn per cycle - 0.735944609 seconds time elapsed + 2,830,711,742 cycles # 2.640 GHz + 6,997,830,070 instructions # 2.47 insn per cycle + 1.072903816 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174574524E-004 Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.510898e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.523096e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.523096e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.661358 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.772339e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.780808e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.780808e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.936387 sec INFO: No Floating Point Exceptions have been reported - 2,187,351,220 cycles # 3.300 GHz - 6,306,890,918 instructions # 2.88 insn per cycle - 0.663074680 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10903) (512y: 9) (512z: 0) + 2,497,824,247 cycles # 2.658 GHz + 6,305,168,616 instructions # 2.52 insn per cycle + 0.940674173 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174574524E-004 Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.096744e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.129704e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.129704e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 0.408629 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.362852e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.367803e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.367803e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 +TOTAL : 1.214531 sec INFO: No Floating Point Exceptions have been reported - 1,352,894,692 cycles # 3.300 GHz - 3,266,957,888 instructions # 2.41 insn per cycle - 0.410277153 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2440) (512y: 14) (512z: 9577) + 2,054,265,568 cycles # 1.686 GHz + 3,276,400,100 instructions # 1.59 insn per cycle + 1.218830996 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2427) (512y: 46) (512z: 9578) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627195e-04 Avg ME (F77/C++) = 6.6271952779373838E-004 Relative difference = 4.193891735414155e-08 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt index 06c6baa998..1c3846a692 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt @@ -1,183 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_15:17:26 +DATE: 2024-05-16_15:13:15 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.362325e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.415082e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.420724e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.159397e-01 +- 3.238804e-01 ) GeV^-4 +TOTAL : 0.471632 sec +INFO: No Floating Point Exceptions have been reported + 1,968,514,245 cycles # 2.841 GHz + 2,902,581,432 instructions # 1.47 insn per cycle + 0.750206216 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.620229e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.693332e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.696842e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.094367e+02 +- 1.071509e+02 ) GeV^-4 +TOTAL : 1.810763 sec +INFO: No Floating Point Exceptions have been reported + 5,856,364,996 cycles # 2.870 GHz + 12,360,478,892 instructions # 2.11 insn per cycle + 2.100167053 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262659968156085E-004 +Relative difference = 2.8371612387547027e-05 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.550538e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.551807e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.551807e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.921942e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.922884e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.922884e+03 ) sec^-1 MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 6.436920 sec +TOTAL : 8.542686 sec INFO: No Floating Point Exceptions have been reported - 22,525,491,366 cycles # 3.499 GHz - 78,108,478,651 instructions # 3.47 insn per cycle - 6.438477750 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3570) (avx2: 0) (512y: 0) (512z: 0) + 24,622,493,732 cycles # 2.881 GHz + 78,127,963,456 instructions # 3.17 insn per cycle + 8.546707601 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3603) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274863266294753E-004 Relative difference = 4.92840687132121e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 9.781607e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.800773e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.800773e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.925135e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.937766e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.937766e+03 ) sec^-1 MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 -TOTAL : 1.683203 sec +TOTAL : 2.378023 sec INFO: No Floating Point Exceptions have been reported - 5,705,621,133 cycles # 3.387 GHz - 20,115,711,639 instructions # 3.53 insn per cycle - 1.684787201 seconds time elapsed + 6,481,719,151 cycles # 2.722 GHz + 20,120,720,773 instructions # 3.10 insn per cycle + 2.382079719 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274861460025036E-004 Relative difference = 2.2029847170826283e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.259703e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.269603e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.269603e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.581338e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.587855e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.587855e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.732175 sec +TOTAL : 1.046370 sec INFO: No Floating Point Exceptions have been reported - 2,420,845,321 cycles # 3.300 GHz - 6,983,845,118 instructions # 2.88 insn per cycle - 0.733740969 seconds time elapsed + 2,822,358,408 cycles # 2.688 GHz + 6,985,542,199 instructions # 2.48 insn per cycle + 1.050425346 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174574524E-004 Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.511960e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.524145e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.524145e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.806198e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.814674e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.814674e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.659241 sec +TOTAL : 0.917862 sec INFO: No Floating Point Exceptions have been reported - 2,179,991,812 cycles # 3.300 GHz - 6,297,120,369 instructions # 2.89 insn per cycle - 0.660820449 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10903) (512y: 9) (512z: 0) + 2,496,110,223 cycles # 2.709 GHz + 6,293,657,033 instructions # 2.52 insn per cycle + 0.921934399 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174574524E-004 Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.090242e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.123456e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.123456e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.393764e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.398765e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.398765e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 0.407437 sec +TOTAL : 1.186576 sec INFO: No Floating Point Exceptions have been reported - 1,346,315,275 cycles # 3.294 GHz - 3,256,520,218 instructions # 2.42 insn per cycle - 0.409011406 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2440) (512y: 14) (512z: 9577) + 2,050,577,153 cycles # 1.723 GHz + 3,264,219,053 instructions # 1.59 insn per cycle + 1.190613213 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2427) (512y: 46) (512z: 9578) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627195e-04 Avg ME (F77/C++) = 6.6271952779373838E-004 Relative difference = 4.193891735414155e-08 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt index 22ac48e822..97148e3ba7 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt @@ -1,128 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_15:16:12 +DATE: 2024-05-16_15:10:28 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.326390e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.378340e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.384052e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 +TOTAL : 0.468432 sec +INFO: No Floating Point Exceptions have been reported + 1,956,153,885 cycles # 2.820 GHz + 2,925,124,547 instructions # 1.50 insn per cycle + 0.750741002 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.616852e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.690868e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.694290e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 +TOTAL : 1.758835 sec +INFO: No Floating Point Exceptions have been reported + 5,694,632,258 cycles # 2.846 GHz + 12,170,382,669 instructions # 2.14 insn per cycle + 2.057387110 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262659968156085E-004 +Relative difference = 2.8371612387547027e-05 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= -/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe: Aborted - 4,029,925 cycles # 3.247 GHz - 6,334,889 instructions # 1.57 insn per cycle - 0.038588342 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3570) (avx2: 0) (512y: 0) (512z: 0) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.910170e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.911121e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.911121e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 +TOTAL : 8.594641 sec +INFO: No Floating Point Exceptions have been reported + 24,610,525,016 cycles # 2.863 GHz + 78,132,278,540 instructions # 3.17 insn per cycle + 8.598723021 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3603) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274863266294753E-004 Relative difference = 4.92840687132121e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= -/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe: Aborted - 4,252,444 cycles # 3.248 GHz - 6,370,939 instructions # 1.50 insn per cycle - 0.038168696 seconds time elapsed +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.888566e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.900956e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.900956e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 +TOTAL : 2.388862 sec +INFO: No Floating Point Exceptions have been reported + 6,476,954,136 cycles # 2.708 GHz + 20,121,920,046 instructions # 3.11 insn per cycle + 2.393015096 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274861460025036E-004 Relative difference = 2.2029847170826283e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= -/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe: Aborted - 4,035,262 cycles # 3.249 GHz - 6,367,716 instructions # 1.58 insn per cycle - 0.037515296 seconds time elapsed +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.562187e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.568435e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.568435e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 1.057977 sec +INFO: No Floating Point Exceptions have been reported + 2,818,730,747 cycles # 2.656 GHz + 6,988,428,853 instructions # 2.48 insn per cycle + 1.062013241 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174574524E-004 Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= -/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe: Aborted - 4,098,823 cycles # 3.249 GHz - 6,358,246 instructions # 1.55 insn per cycle - 0.038463032 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10903) (512y: 9) (512z: 0) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.767100e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.775059e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.775059e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.936311 sec +INFO: No Floating Point Exceptions have been reported + 2,487,367,369 cycles # 2.647 GHz + 6,295,352,067 instructions # 2.53 insn per cycle + 0.940344403 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174574524E-004 Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= -/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe: Aborted - 4,050,326 cycles # 3.250 GHz - 6,355,795 instructions # 1.57 insn per cycle - 0.037850226 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2440) (512y: 14) (512z: 9577) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.354758e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.359575e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.359575e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 +TOTAL : 1.218960 sec +INFO: No Floating Point Exceptions have been reported + 2,057,435,423 cycles # 1.683 GHz + 3,266,628,935 instructions # 1.59 insn per cycle + 1.223151915 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2427) (512y: 46) (512z: 9578) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627195e-04 Avg ME (F77/C++) = 6.6271952779373838E-004 Relative difference = 4.193891735414155e-08 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt index dd77f1c1e4..dc12ca7aae 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt @@ -1,183 +1,232 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_15:15:50 +DATE: 2024-05-16_15:07:45 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.747793e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.405382e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.411341e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 +TOTAL : 0.473580 sec +INFO: No Floating Point Exceptions have been reported + 1,929,031,590 cycles # 2.811 GHz + 2,902,080,173 instructions # 1.50 insn per cycle + 0.744461149 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.464876e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.690964e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.694375e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.641710e+00 +- 4.994249e+00 ) GeV^-4 +TOTAL : 1.841417 sec +INFO: No Floating Point Exceptions have been reported + 5,892,322,421 cycles # 2.846 GHz + 12,206,550,799 instructions # 2.07 insn per cycle + 2.128532659 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262659968156085E-004 +Relative difference = 2.8371612387547027e-05 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.550847e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.552104e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.552104e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 6.435838 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.911509e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.912427e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.912427e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 +TOTAL : 8.588000 sec INFO: No Floating Point Exceptions have been reported - 22,523,261,605 cycles # 3.499 GHz - 78,108,255,985 instructions # 3.47 insn per cycle - 6.437369331 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3570) (avx2: 0) (512y: 0) (512z: 0) + 24,603,486,303 cycles # 2.864 GHz + 78,128,844,221 instructions # 3.18 insn per cycle + 8.592028071 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3603) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274863266294753E-004 Relative difference = 4.92840687132121e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 9.783581e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.802686e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.802686e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 -TOTAL : 1.682828 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.897521e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.909886e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.909886e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 +TOTAL : 2.385730 sec INFO: No Floating Point Exceptions have been reported - 5,704,990,484 cycles # 3.388 GHz - 20,115,695,758 instructions # 3.53 insn per cycle - 1.684404018 seconds time elapsed + 6,477,077,766 cycles # 2.711 GHz + 20,121,628,941 instructions # 3.11 insn per cycle + 2.389950461 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274861460025036E-004 Relative difference = 2.2029847170826283e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.258809e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.268753e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.268753e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.732400 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.564279e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.570570e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.570570e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 1.056604 sec INFO: No Floating Point Exceptions have been reported - 2,421,755,654 cycles # 3.300 GHz - 6,983,836,255 instructions # 2.88 insn per cycle - 0.734028274 seconds time elapsed + 2,817,814,854 cycles # 2.658 GHz + 6,988,003,654 instructions # 2.48 insn per cycle + 1.060745031 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174574524E-004 Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.511906e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.524099e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.524099e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.659359 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.769248e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.777272e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.777272e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.935150 sec INFO: No Floating Point Exceptions have been reported - 2,180,577,486 cycles # 3.300 GHz - 6,297,266,641 instructions # 2.89 insn per cycle - 0.661018945 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10903) (512y: 9) (512z: 0) + 2,489,664,656 cycles # 2.652 GHz + 6,295,373,565 instructions # 2.53 insn per cycle + 0.939255376 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174574524E-004 Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.090536e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.123527e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.123527e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 0.407424 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.359069e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.363945e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.363945e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 +TOTAL : 1.214973 sec INFO: No Floating Point Exceptions have been reported - 1,346,438,406 cycles # 3.294 GHz - 3,256,919,958 instructions # 2.42 insn per cycle - 0.409008098 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2440) (512y: 14) (512z: 9577) + 2,049,104,437 cycles # 1.682 GHz + 3,266,431,248 instructions # 1.59 insn per cycle + 1.219018056 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2427) (512y: 46) (512z: 9578) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627195e-04 Avg ME (F77/C++) = 6.6271952779373838E-004 Relative difference = 4.193891735414155e-08 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt index fc36a399b8..fb9b3d5f50 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt @@ -1,183 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_14:58:54 +DATE: 2024-05-16_14:41:16 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.355693e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.401036e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.406994e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 +TOTAL : 0.484635 sec +INFO: No Floating Point Exceptions have been reported + 1,972,569,534 cycles # 2.816 GHz + 2,939,499,932 instructions # 1.49 insn per cycle + 0.757402101 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.619113e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.679355e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.682149e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 +TOTAL : 1.713739 sec +INFO: No Floating Point Exceptions have been reported + 5,540,767,327 cycles # 2.848 GHz + 11,699,037,597 instructions # 2.11 insn per cycle + 2.001424634 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262659968156085E-004 +Relative difference = 2.8371612387547027e-05 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.574663e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.575966e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.575966e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 6.376595 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.914726e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.915647e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.915647e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 +TOTAL : 8.573445 sec INFO: No Floating Point Exceptions have been reported - 22,304,285,207 cycles # 3.497 GHz - 77,752,971,522 instructions # 3.49 insn per cycle - 6.378130573 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3125) (avx2: 0) (512y: 0) (512z: 0) + 24,558,709,341 cycles # 2.863 GHz + 77,854,833,330 instructions # 3.17 insn per cycle + 8.577561930 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3114) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274863364631370E-004 -Relative difference = 5.076783822441729e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627487e-04 +Avg ME (F77/C++) = 6.6274866268634797E-004 +Relative difference = 5.630135835748959e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 9.796224e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.815339e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.815339e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 -TOTAL : 1.680504 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.975982e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.989277e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.989277e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 +TOTAL : 2.359034 sec INFO: No Floating Point Exceptions have been reported - 5,672,939,486 cycles # 3.373 GHz - 20,080,965,368 instructions # 3.54 insn per cycle - 1.682166460 seconds time elapsed + 6,427,044,365 cycles # 2.721 GHz + 20,086,102,386 instructions # 3.13 insn per cycle + 2.363343503 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13452) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274861465384638E-004 Relative difference = 2.211071647257023e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.181406e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.190597e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.190597e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.758068 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.504468e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.510257e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.510257e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 1.097904 sec INFO: No Floating Point Exceptions have been reported - 2,506,172,888 cycles # 3.300 GHz - 7,125,446,367 instructions # 2.84 insn per cycle - 0.759654544 seconds time elapsed + 2,922,790,348 cycles # 2.654 GHz + 7,129,934,034 instructions # 2.44 insn per cycle + 1.101954791 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12261) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271939668077068E-004 Relative difference = 5.008498817890231e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.425163e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.436500e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.436500e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.682581 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.699271e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.706623e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.706623e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.973114 sec INFO: No Floating Point Exceptions have been reported - 2,257,025,798 cycles # 3.300 GHz - 6,436,084,408 instructions # 2.85 insn per cycle - 0.684182276 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11302) (512y: 13) (512z: 0) + 2,595,556,237 cycles # 2.658 GHz + 6,438,662,691 instructions # 2.48 insn per cycle + 0.977341866 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11276) (512y: 27) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271939668077068E-004 Relative difference = 5.008498817890231e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.899548e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.929377e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.929377e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 0.426829 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.316379e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.321013e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.321013e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 +TOTAL : 1.254012 sec INFO: No Floating Point Exceptions have been reported - 1,409,649,726 cycles # 3.293 GHz - 3,419,604,594 instructions # 2.43 insn per cycle - 0.428369002 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2934) (512y: 14) (512z: 9653) + 2,116,081,195 cycles # 1.683 GHz + 3,427,806,501 instructions # 1.62 insn per cycle + 1.258282002 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2924) (512y: 22) (512z: 9654) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627195e-04 Avg ME (F77/C++) = 6.6271952032322112E-004 Relative difference = 3.066639970473621e-08 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt index 43b17e8e7f..3f8f67a608 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt @@ -1,183 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_15:09:39 +DATE: 2024-05-16_14:55:36 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.546626e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.586411e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.590653e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 +TOTAL : 0.493388 sec +INFO: No Floating Point Exceptions have been reported + 2,052,942,224 cycles # 2.851 GHz + 3,071,897,705 instructions # 1.50 insn per cycle + 0.778091403 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.711232e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.769137e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.771784e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 +TOTAL : 1.862053 sec +INFO: No Floating Point Exceptions have been reported + 6,039,204,312 cycles # 2.872 GHz + 11,937,016,347 instructions # 1.98 insn per cycle + 2.158356809 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262660579844562E-004 +Relative difference = 2.836238137986709e-05 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 6.188091e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.188835e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.188835e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.204932e-01 +- 3.252405e-01 ) GeV^-4 -TOTAL : 26.510008 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.454006e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.454774e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.454774e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 +TOTAL : 30.077717 sec INFO: No Floating Point Exceptions have been reported - 92,700,828,536 cycles # 3.497 GHz - 134,385,753,101 instructions # 1.45 insn per cycle - 26.511637845 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:15522) (avx2: 0) (512y: 0) (512z: 0) + 86,228,096,895 cycles # 2.867 GHz + 135,581,749,205 instructions # 1.57 insn per cycle + 30.081848617 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:15593) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627535e-04 -Avg ME (F77/C++) = 6.6275351143091354E-004 -Relative difference = 1.7247609416495452e-08 +Avg ME (F77/C++) = 6.6275351196781740E-004 +Relative difference = 1.805772034719401e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 8.677211e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.692275e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.692275e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.211992e-01 +- 3.254573e-01 ) GeV^-4 -TOTAL : 1.896646 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.767198e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.779100e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.779100e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 +TOTAL : 2.432356 sec INFO: No Floating Point Exceptions have been reported - 6,557,530,460 cycles # 3.455 GHz - 19,381,795,156 instructions # 2.96 insn per cycle - 1.898268655 seconds time elapsed + 6,776,462,064 cycles # 2.783 GHz + 19,386,992,522 instructions # 2.86 insn per cycle + 2.436630257 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:69681) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274862707273868E-004 Relative difference = 4.0849182767952624e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.740924e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.746776e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.746776e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.211846e-01 +- 3.254638e-01 ) GeV^-4 -TOTAL : 0.948430 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.415254e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.420302e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.420302e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 +TOTAL : 1.167177 sec INFO: No Floating Point Exceptions have been reported - 3,134,223,153 cycles # 3.300 GHz - 6,803,645,460 instructions # 2.17 insn per cycle - 0.950022327 seconds time elapsed + 3,174,327,264 cycles # 2.711 GHz + 6,807,988,001 instructions # 2.14 insn per cycle + 1.171487938 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:49077) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627273e-04 Avg ME (F77/C++) = 6.6272731558747466E-004 Relative difference = 2.3520194007978538e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.106465e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.115078e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.115078e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.211846e-01 +- 3.254638e-01 ) GeV^-4 -TOTAL : 0.785014 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.702865e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.710950e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.710950e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 +TOTAL : 0.971352 sec INFO: No Floating Point Exceptions have been reported - 2,595,353,867 cycles # 3.300 GHz - 5,981,224,171 instructions # 2.30 insn per cycle - 0.786569307 seconds time elapsed + 2,641,359,018 cycles # 2.709 GHz + 5,985,956,533 instructions # 2.27 insn per cycle + 0.975633569 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:42677) (512y: 11) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627273e-04 Avg ME (F77/C++) = 6.6272731558747466E-004 Relative difference = 2.3520194007978538e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.297777e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.318841e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.318841e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.211848e-01 +- 3.254639e-01 ) GeV^-4 -TOTAL : 0.503806 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.382138e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.387143e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.387143e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060904e+00 +- 2.367377e+00 ) GeV^-4 +TOTAL : 1.195051 sec INFO: No Floating Point Exceptions have been reported - 1,665,883,909 cycles # 3.298 GHz - 3,493,968,719 instructions # 2.10 insn per cycle - 0.505369622 seconds time elapsed + 2,079,765,601 cycles # 1.735 GHz + 3,501,460,071 instructions # 1.68 insn per cycle + 1.199295448 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5210) (512y: 3) (512z:44829) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627275e-04 Avg ME (F77/C++) = 6.6272750363879224E-004 Relative difference = 5.490631193034436e-09 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt index 4f88708a74..f651d28060 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt @@ -1,183 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_15:10:12 +DATE: 2024-05-16_14:56:25 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.511551e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.549792e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.554590e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 +TOTAL : 0.495451 sec +INFO: No Floating Point Exceptions have been reported + 2,081,587,427 cycles # 2.833 GHz + 3,058,350,902 instructions # 1.47 insn per cycle + 0.791962629 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.609535e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.664835e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.667390e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 +TOTAL : 1.882393 sec +INFO: No Floating Point Exceptions have been reported + 6,108,387,546 cycles # 2.880 GHz + 12,340,826,531 instructions # 2.02 insn per cycle + 2.177538628 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262660579844562E-004 +Relative difference = 2.836238137986709e-05 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 6.280786e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.281547e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.281547e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.204931e-01 +- 3.252404e-01 ) GeV^-4 -TOTAL : 26.118990 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.501753e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.502516e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.502516e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 +TOTAL : 29.816879 sec INFO: No Floating Point Exceptions have been reported - 91,385,851,959 cycles # 3.499 GHz - 135,143,127,407 instructions # 1.48 insn per cycle - 26.120638728 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:15420) (avx2: 0) (512y: 0) (512z: 0) + 86,332,810,324 cycles # 2.895 GHz + 136,005,056,328 instructions # 1.58 insn per cycle + 29.820915946 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:15571) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627535e-04 -Avg ME (F77/C++) = 6.6275348903634818E-004 -Relative difference = 1.654257859275456e-08 +Avg ME (F77/C++) = 6.6275348988418387E-004 +Relative difference = 1.5263316105958472e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 8.477331e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.491555e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.491555e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.211992e-01 +- 3.254573e-01 ) GeV^-4 -TOTAL : 1.941105 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.665500e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.677672e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.677672e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 +TOTAL : 2.468426 sec INFO: No Floating Point Exceptions have been reported - 6,641,629,719 cycles # 3.419 GHz - 19,434,304,508 instructions # 2.93 insn per cycle - 1.942818126 seconds time elapsed + 6,845,942,231 cycles # 2.769 GHz + 19,438,050,467 instructions # 2.84 insn per cycle + 2.472841886 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:69723) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274862764021530E-004 Relative difference = 4.170542995014107e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.794550e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.800752e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.800752e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.211846e-01 +- 3.254638e-01 ) GeV^-4 -TOTAL : 0.920220 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.417376e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.422501e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.422501e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 +TOTAL : 1.165327 sec INFO: No Floating Point Exceptions have been reported - 3,041,217,956 cycles # 3.300 GHz - 6,714,527,084 instructions # 2.21 insn per cycle - 0.921825192 seconds time elapsed + 3,124,352,057 cycles # 2.673 GHz + 6,718,803,660 instructions # 2.15 insn per cycle + 1.169556736 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:47667) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627273e-04 Avg ME (F77/C++) = 6.6272731651051409E-004 Relative difference = 2.4912983202981302e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.123754e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.132315e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.132315e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.211846e-01 +- 3.254638e-01 ) GeV^-4 -TOTAL : 0.778442 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.691458e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.698947e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.698947e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 +TOTAL : 0.977716 sec INFO: No Floating Point Exceptions have been reported - 2,573,375,562 cycles # 3.300 GHz - 5,964,316,953 instructions # 2.32 insn per cycle - 0.780039175 seconds time elapsed + 2,637,169,918 cycles # 2.688 GHz + 5,969,286,098 instructions # 2.26 insn per cycle + 0.981942660 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:41842) (512y: 13) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627273e-04 Avg ME (F77/C++) = 6.6272731651051409E-004 Relative difference = 2.4912983202981302e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.311953e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.333717e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.333717e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.211848e-01 +- 3.254639e-01 ) GeV^-4 -TOTAL : 0.501662 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.359514e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.364207e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.364207e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060904e+00 +- 2.367377e+00 ) GeV^-4 +TOTAL : 1.214640 sec INFO: No Floating Point Exceptions have been reported - 1,656,457,681 cycles # 3.293 GHz - 3,487,542,284 instructions # 2.11 insn per cycle - 0.503287639 seconds time elapsed + 2,077,190,375 cycles # 1.705 GHz + 3,494,266,618 instructions # 1.68 insn per cycle + 1.219012886 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4174) (512y: 4) (512z:44472) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627275e-04 Avg ME (F77/C++) = 6.6272750384530066E-004 Relative difference = 5.80223501432476e-09 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 4df23c6fce..19b36f52e3 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,183 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_14:59:06 +DATE: 2024-05-16_14:41:41 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.461685e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.485212e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.487571e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.529512 sec +INFO: No Floating Point Exceptions have been reported + 2,178,837,447 cycles # 2.822 GHz + 3,364,663,947 instructions # 1.54 insn per cycle + 0.830788537 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.126264e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.153475e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.154649e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.029127 sec +INFO: No Floating Point Exceptions have been reported + 9,370,132,482 cycles # 2.854 GHz + 19,961,685,193 instructions # 2.13 insn per cycle + 3.339329204 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626675e-04 +Avg ME (F77/GPU) = 6.6266732376103494E-004 +Relative difference = 2.659538381540814e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.467395e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.468594e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.468594e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 6.654282 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.814801e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.815638e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.815638e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 9.046315 sec INFO: No Floating Point Exceptions have been reported - 23,281,892,879 cycles # 3.498 GHz - 79,277,659,651 instructions # 3.41 insn per cycle - 6.655931634 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4801) (avx2: 0) (512y: 0) (512z: 0) + 25,898,843,827 cycles # 2.862 GHz + 79,438,691,532 instructions # 3.07 insn per cycle + 9.050548799 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4858) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731406016235E-004 Relative difference = 2.8059296349552523e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.734185e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.738541e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.738541e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.471801 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.427576e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.430583e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.430583e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.794109 sec INFO: No Floating Point Exceptions have been reported - 11,461,390,223 cycles # 3.300 GHz - 38,542,705,401 instructions # 3.36 insn per cycle - 3.473446260 seconds time elapsed + 12,707,110,349 cycles # 2.649 GHz + 38,549,995,901 instructions # 3.03 insn per cycle + 4.798432518 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13163) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730246908442E-004 Relative difference = 2.98084507782618e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.159663e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.162119e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.162119e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.421427 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.947160e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.962973e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.962973e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.073357 sec INFO: No Floating Point Exceptions have been reported - 4,695,608,400 cycles # 3.300 GHz - 13,474,466,372 instructions # 2.87 insn per cycle - 1.423141317 seconds time elapsed + 5,517,673,615 cycles # 2.658 GHz + 13,479,814,632 instructions # 2.44 insn per cycle + 2.077628129 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11242) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730409276857E-004 Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.283073e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.286139e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.286139e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.285513 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.092853e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.113453e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.113453e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.813039 sec INFO: No Floating Point Exceptions have been reported - 4,247,114,233 cycles # 3.300 GHz - 12,145,146,492 instructions # 2.86 insn per cycle - 1.287225342 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10237) (512y: 45) (512z: 0) + 4,828,852,439 cycles # 2.658 GHz + 12,135,084,334 instructions # 2.51 insn per cycle + 1.817332368 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10154) (512y: 79) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730409276857E-004 Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.009907e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.017373e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.017373e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.823451 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.671146e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.682190e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.682190e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.468744 sec INFO: No Floating Point Exceptions have been reported - 2,715,458,356 cycles # 3.292 GHz - 6,302,170,709 instructions # 2.32 insn per cycle - 0.825158510 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1807) (512y: 61) (512z: 9355) + 4,141,507,976 cycles # 1.676 GHz + 6,337,241,929 instructions # 1.53 insn per cycle + 2.472886901 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1803) (512y: 93) (512z: 9358) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730409276857E-004 Relative difference = 2.956342832710188e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt index 54fe1ccb0f..10c707e81e 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt @@ -1,183 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_14:59:22 +DATE: 2024-05-16_14:42:15 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.482619e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.506518e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.508744e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.530792 sec +INFO: No Floating Point Exceptions have been reported + 2,175,803,522 cycles # 2.817 GHz + 3,378,965,043 instructions # 1.55 insn per cycle + 0.832396723 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.148343e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.175835e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.177034e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.021680 sec +INFO: No Floating Point Exceptions have been reported + 9,373,127,330 cycles # 2.855 GHz + 21,008,547,067 instructions # 2.24 insn per cycle + 3.339100414 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626675e-04 +Avg ME (F77/GPU) = 6.6266732376103494E-004 +Relative difference = 2.659538381540814e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.464129e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.465346e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.465346e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 6.663279 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.816314e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.817146e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.817146e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 9.038908 sec INFO: No Floating Point Exceptions have been reported - 23,317,642,584 cycles # 3.499 GHz - 79,208,074,098 instructions # 3.40 insn per cycle - 6.664923308 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4465) (avx2: 0) (512y: 0) (512z: 0) + 25,883,565,851 cycles # 2.863 GHz + 79,454,182,113 instructions # 3.07 insn per cycle + 9.043041112 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4505) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731406016235E-004 Relative difference = 2.8059296349552523e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.686297e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.690537e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.690537e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.506677 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.444359e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.447378e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.447378e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.770841 sec INFO: No Floating Point Exceptions have been reported - 11,576,560,337 cycles # 3.300 GHz - 38,482,658,385 instructions # 3.32 insn per cycle - 3.508299179 seconds time elapsed + 12,673,930,469 cycles # 2.656 GHz + 38,521,208,960 instructions # 3.04 insn per cycle + 4.775036357 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:12930) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730246908442E-004 Relative difference = 2.98084507782618e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.148090e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.150478e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.150478e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.436063 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.869925e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.885399e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.885399e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.093382 sec INFO: No Floating Point Exceptions have been reported - 4,742,292,128 cycles # 3.300 GHz - 13,586,331,325 instructions # 2.86 insn per cycle - 1.437759750 seconds time elapsed + 5,571,627,209 cycles # 2.657 GHz + 13,607,217,607 instructions # 2.44 insn per cycle + 2.097652206 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11327) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730409276857E-004 Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.271453e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.274430e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.274430e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.297103 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.920636e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.941759e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.941759e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.847928 sec INFO: No Floating Point Exceptions have been reported - 4,285,298,964 cycles # 3.300 GHz - 12,257,011,991 instructions # 2.86 insn per cycle - 1.298819689 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10175) (512y: 225) (512z: 0) + 4,911,801,030 cycles # 2.653 GHz + 12,271,296,407 instructions # 2.50 insn per cycle + 1.852091714 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10143) (512y: 239) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730409276857E-004 Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.997853e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.005253e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.005253e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.828105 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.658898e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.670376e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.670376e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.472418 sec INFO: No Floating Point Exceptions have been reported - 2,734,224,299 cycles # 3.296 GHz - 6,405,345,797 instructions # 2.34 insn per cycle - 0.829773255 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1639) (512y: 181) (512z: 9355) + 4,148,038,447 cycles # 1.675 GHz + 6,442,551,576 instructions # 1.55 insn per cycle + 2.476725391 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1628) (512y: 191) (512z: 9356) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730409276857E-004 Relative difference = 2.956342832710188e-07 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index fc32cd9504..5b5bd116a3 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,183 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-16_15:00:02 +DATE: 2024-05-16_14:44:06 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.065678e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.066059e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.066269e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 2.433623 sec +INFO: No Floating Point Exceptions have been reported + 7,848,341,490 cycles # 2.847 GHz + 17,462,165,188 instructions # 2.22 insn per cycle + 2.813787845 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 9.279047e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.280919e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.281197e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 3.985795 sec +INFO: No Floating Point Exceptions have been reported + 12,317,841,072 cycles # 2.855 GHz + 29,065,647,551 instructions # 2.36 insn per cycle + 4.369250222 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.872263e-03 +Avg ME (F77/GPU) = 9.8722595284406640E-003 +Relative difference = 3.5164777671934515e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 9.974428e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.974682e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.974682e+01 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 5.296181 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.769533e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.769744e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.769744e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 6.808252 sec INFO: No Floating Point Exceptions have been reported - 18,531,410,716 cycles # 3.498 GHz - 53,656,558,692 instructions # 2.90 insn per cycle - 5.297695109 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32534) (avx2: 0) (512y: 0) (512z: 0) + 18,798,528,369 cycles # 2.760 GHz + 53,916,630,138 instructions # 2.87 insn per cycle + 6.812355714 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32448) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.903537e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.903625e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.903625e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.776054 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.539980e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.540062e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.540062e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 3.433152 sec INFO: No Floating Point Exceptions have been reported - 9,714,685,657 cycles # 3.498 GHz - 27,085,095,455 instructions # 2.79 insn per cycle - 2.777591504 seconds time elapsed + 9,799,231,624 cycles # 2.852 GHz + 27,092,581,938 instructions # 2.76 insn per cycle + 3.437235180 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:96443) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.267474e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.267897e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.267897e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.239278 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.326889e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.327284e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.327284e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.592072 sec INFO: No Floating Point Exceptions have been reported - 4,093,982,090 cycles # 3.300 GHz - 9,554,590,072 instructions # 2.33 insn per cycle - 1.240820628 seconds time elapsed + 4,220,179,984 cycles # 2.645 GHz + 9,560,887,701 instructions # 2.27 insn per cycle + 1.596045022 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84390) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.917663e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.918229e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.918229e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.075663 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.770010e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.770613e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.770613e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.405649 sec INFO: No Floating Point Exceptions have been reported - 3,553,974,436 cycles # 3.300 GHz - 8,479,633,680 instructions # 2.39 insn per cycle - 1.077213977 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80074) (512y: 57) (512z: 0) + 3,726,923,548 cycles # 2.645 GHz + 8,484,897,516 instructions # 2.28 insn per cycle + 1.409716339 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:79991) (512y: 91) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 8.603814e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.605597e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.605597e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 0.616422 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.281739e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.282258e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.282258e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.614138 sec INFO: No Floating Point Exceptions have been reported - 2,035,813,701 cycles # 3.296 GHz - 4,263,524,895 instructions # 2.09 insn per cycle - 0.617919481 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2288) (512y: 73) (512z:79102) + 2,690,163,143 cycles # 1.663 GHz + 4,272,866,756 instructions # 1.59 insn per cycle + 1.618172762 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2284) (512y: 105) (512z:79105) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt index 3fce28479c..1be1cfeedf 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt @@ -1,188 +1,246 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-16_15:14:01 +DATE: 2024-05-16_15:03:52 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.063403e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.064386e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.064386e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 2.395260 sec +INFO: No Floating Point Exceptions have been reported + 7,755,019,436 cycles # 2.852 GHz + 17,230,726,903 instructions # 2.22 insn per cycle + 2.775067655 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 9.263412e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.297943e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.297943e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 3.971807 sec +INFO: No Floating Point Exceptions have been reported + 12,283,103,403 cycles # 2.855 GHz + 27,758,308,143 instructions # 2.26 insn per cycle + 4.357937638 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.872263e-03 +Avg ME (F77/GPU) = 9.8722595284406640E-003 +Relative difference = 3.5164777671934515e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 9.977506e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.977760e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.977760e+01 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 5.294370 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.361041e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.361236e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.361236e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 7.177191 sec INFO: No Floating Point Exceptions have been reported - 18,525,569,019 cycles # 3.498 GHz - 53,657,433,656 instructions # 2.90 insn per cycle - 5.295871836 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32534) (avx2: 0) (512y: 0) (512z: 0) + 18,941,402,140 cycles # 2.638 GHz + 53,918,413,850 instructions # 2.85 insn per cycle + 7.181202320 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32448) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.904848e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.904938e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.904938e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.774505 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.538220e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.538308e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.538308e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 3.437404 sec INFO: No Floating Point Exceptions have been reported - 9,711,671,203 cycles # 3.499 GHz - 27,086,160,123 instructions # 2.79 insn per cycle - 2.775998605 seconds time elapsed + 9,826,263,323 cycles # 2.856 GHz + 27,093,421,705 instructions # 2.76 insn per cycle + 3.441586183 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:96443) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.268708e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.269153e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.269153e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.238971 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.320877e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.321321e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.321321e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.595267 sec INFO: No Floating Point Exceptions have been reported - 4,093,903,617 cycles # 3.300 GHz - 9,555,873,980 instructions # 2.33 insn per cycle - 1.240497644 seconds time elapsed + 4,226,325,559 cycles # 2.644 GHz + 9,562,000,988 instructions # 2.26 insn per cycle + 1.599357751 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84390) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.921180e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.921761e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.921761e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.074913 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.768698e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.769266e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.769266e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.406299 sec INFO: No Floating Point Exceptions have been reported - 3,551,316,418 cycles # 3.300 GHz - 8,480,411,726 instructions # 2.39 insn per cycle - 1.076454882 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80074) (512y: 57) (512z: 0) + 3,728,202,948 cycles # 2.645 GHz + 8,485,828,873 instructions # 2.28 insn per cycle + 1.410433353 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:79991) (512y: 91) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 8.564439e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.566215e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.566215e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 0.619417 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.279021e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.279511e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.279511e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.615850 sec INFO: No Floating Point Exceptions have been reported - 2,045,719,951 cycles # 3.296 GHz - 4,264,571,385 instructions # 2.08 insn per cycle - 0.620945943 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2288) (512y: 73) (512z:79102) + 2,693,497,833 cycles # 1.663 GHz + 4,273,840,765 instructions # 1.59 insn per cycle + 1.620067219 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2284) (512y: 105) (512z:79105) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt index bd409ca461..3f519fda03 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt @@ -1,183 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-16_15:00:24 +DATE: 2024-05-16_14:45:01 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.065431e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.065819e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.066015e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 2.431749 sec +INFO: No Floating Point Exceptions have been reported + 7,858,443,167 cycles # 2.852 GHz + 17,797,449,482 instructions # 2.26 insn per cycle + 2.810886675 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 9.189050e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.190883e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.191132e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 4.007219 sec +INFO: No Floating Point Exceptions have been reported + 12,388,147,716 cycles # 2.856 GHz + 29,572,084,158 instructions # 2.39 insn per cycle + 4.393669645 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.872263e-03 +Avg ME (F77/GPU) = 9.8722595284406640E-003 +Relative difference = 3.5164777671934515e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.001907e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.001932e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.001932e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 5.272707 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.555801e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.556015e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.556015e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 6.995892 sec INFO: No Floating Point Exceptions have been reported - 18,450,607,097 cycles # 3.499 GHz - 53,658,226,288 instructions # 2.91 insn per cycle - 5.274217478 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32178) (avx2: 0) (512y: 0) (512z: 0) + 18,868,341,179 cycles # 2.696 GHz + 53,930,114,085 instructions # 2.86 insn per cycle + 6.999840535 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32063) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.912922e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.913010e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.913010e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.762771 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.547899e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.547983e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.547983e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 3.415669 sec INFO: No Floating Point Exceptions have been reported - 9,669,007,974 cycles # 3.499 GHz - 27,071,852,156 instructions # 2.80 insn per cycle - 2.764282785 seconds time elapsed + 9,762,163,827 cycles # 2.856 GHz + 27,089,755,364 instructions # 2.77 insn per cycle + 3.419663266 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:96286) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.249552e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.249989e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.249989e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.244153 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.328520e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.328923e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.328923e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.591769 sec INFO: No Floating Point Exceptions have been reported - 4,109,934,644 cycles # 3.300 GHz - 9,549,086,138 instructions # 2.32 insn per cycle - 1.245663580 seconds time elapsed + 4,217,350,816 cycles # 2.647 GHz + 9,560,856,496 instructions # 2.27 insn per cycle + 1.595749154 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84478) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.860053e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.860597e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.860597e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.088189 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.765235e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.765802e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.765802e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.407302 sec INFO: No Floating Point Exceptions have been reported - 3,595,363,738 cycles # 3.300 GHz - 8,473,870,494 instructions # 2.36 insn per cycle - 1.089783231 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80046) (512y: 227) (512z: 0) + 3,737,969,275 cycles # 2.650 GHz + 8,484,674,655 instructions # 2.27 insn per cycle + 1.411406372 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80014) (512y: 241) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 8.547370e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.549049e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.549049e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 0.620509 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.273845e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.274322e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.274322e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.617639 sec INFO: No Floating Point Exceptions have been reported - 2,049,145,259 cycles # 3.296 GHz - 4,264,452,480 instructions # 2.08 insn per cycle - 0.621971347 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2180) (512y: 177) (512z:79109) + 2,695,774,477 cycles # 1.663 GHz + 4,276,120,388 instructions # 1.59 insn per cycle + 1.621698890 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2169) (512y: 187) (512z:79110) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index 816b77a603..8097294660 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,183 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-16_15:00:45 +DATE: 2024-05-16_14:45:55 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.560287e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.561087e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.561509e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 +TOTAL : 1.688313 sec +INFO: No Floating Point Exceptions have been reported + 5,610,919,333 cycles # 2.843 GHz + 12,076,970,192 instructions # 2.15 insn per cycle + 2.032164963 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.335524e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.336187e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.336332e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856829e-04 +- 8.333435e-05 ) GeV^-6 +TOTAL : 1.920912 sec +INFO: No Floating Point Exceptions have been reported + 6,262,064,127 cycles # 2.846 GHz + 13,866,454,713 instructions # 2.21 insn per cycle + 2.256561773 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.849636e-03 +Avg ME (F77/GPU) = 9.8712405367667715E-003 +Relative difference = 0.0021934350433631634 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.035543e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.035570e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.035570e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.924324e-03 +- 4.918778e-03 ) GeV^-6 -TOTAL : 5.101302 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.473644e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.473896e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.473896e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 +TOTAL : 6.236471 sec INFO: No Floating Point Exceptions have been reported - 17,846,801,084 cycles # 3.498 GHz - 53,421,326,194 instructions # 2.99 insn per cycle - 5.102780488 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:20403) (avx2: 0) (512y: 0) (512z: 0) + 17,834,532,335 cycles # 2.858 GHz + 53,589,179,622 instructions # 3.00 insn per cycle + 6.240522901 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:20208) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087545108E-003 -Relative difference = 2.11977393295785e-08 +Avg ME (F77/C++) = 9.8479612087541066E-003 +Relative difference = 2.1197698286506752e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.202236e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.202683e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.202683e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.924321e-03 +- 4.918774e-03 ) GeV^-6 -TOTAL : 1.258815 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.311314e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.311704e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.311704e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 +TOTAL : 1.599839 sec INFO: No Floating Point Exceptions have been reported - 4,406,545,916 cycles # 3.497 GHz - 13,755,398,365 instructions # 3.12 insn per cycle - 1.260309156 seconds time elapsed + 4,578,829,094 cycles # 2.856 GHz + 13,761,810,246 instructions # 3.01 insn per cycle + 1.603811766 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:96986) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.847955e-03 Avg ME (F77/C++) = 9.8479546896527003E-003 Relative difference = 3.151388282563952e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 8.467032e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.468910e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.468910e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 -TOTAL : 0.625447 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.636666e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.638274e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.638274e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.800488 sec INFO: No Floating Point Exceptions have been reported - 2,068,444,810 cycles # 3.300 GHz - 4,810,294,241 instructions # 2.33 insn per cycle - 0.626978760 seconds time elapsed + 2,129,570,848 cycles # 2.649 GHz + 4,816,093,977 instructions # 2.26 insn per cycle + 0.804523713 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84904) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 Avg ME (F77/C++) = 9.8929728161070551E-003 Relative difference = 1.858823877057982e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 9.742997e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.745348e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.745348e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 -TOTAL : 0.543809 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.627354e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.629498e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.629498e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.697299 sec INFO: No Floating Point Exceptions have been reported - 1,798,807,079 cycles # 3.300 GHz - 4,267,865,954 instructions # 2.37 insn per cycle - 0.545350839 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80691) (512y: 12) (512z: 0) + 1,857,131,979 cycles # 2.651 GHz + 4,273,320,598 instructions # 2.30 insn per cycle + 0.701213399 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80610) (512y: 46) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 Avg ME (F77/C++) = 9.8929728161070551E-003 Relative difference = 1.858823877057982e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.712642e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.713378e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.713378e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.946834e-03 +- 4.941266e-03 ) GeV^-6 -TOTAL : 0.310824 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.540089e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.542023e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.542023e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 +TOTAL : 0.812936 sec INFO: No Floating Point Exceptions have been reported - 1,027,302,503 cycles # 3.292 GHz - 2,150,832,502 instructions # 2.09 insn per cycle - 0.312380920 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2903) (512y: 17) (512z:79304) + 1,360,618,833 cycles # 1.668 GHz + 2,159,125,772 instructions # 1.59 insn per cycle + 0.816997353 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2890) (512y: 49) (512z:79305) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892981e-03 Avg ME (F77/C++) = 9.8929811982958280E-003 Relative difference = 2.0044092642523172e-08 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt index 269ddf269d..6d352d97ac 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt @@ -1,188 +1,246 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-16_15:14:22 +DATE: 2024-05-16_15:04:47 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.582684e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.584567e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.584567e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187094e-05 +- 9.825664e-06 ) GeV^-6 +TOTAL : 1.633735 sec +INFO: No Floating Point Exceptions have been reported + 5,453,177,396 cycles # 2.847 GHz + 11,627,188,509 instructions # 2.13 insn per cycle + 1.971399165 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.292389e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.306150e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.306150e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856441e-04 +- 8.331096e-05 ) GeV^-6 +TOTAL : 1.932378 sec +INFO: No Floating Point Exceptions have been reported + 6,313,737,946 cycles # 2.848 GHz + 13,568,150,990 instructions # 2.15 insn per cycle + 2.274068662 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.849636e-03 +Avg ME (F77/GPU) = 9.8712405367667715E-003 +Relative difference = 0.0021934350433631634 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.035650e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.035679e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.035679e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.924324e-03 +- 4.918778e-03 ) GeV^-6 -TOTAL : 5.100801 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.447529e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.447785e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.447785e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 +TOTAL : 6.255519 sec INFO: No Floating Point Exceptions have been reported - 17,851,268,457 cycles # 3.499 GHz - 53,422,135,092 instructions # 2.99 insn per cycle - 5.102295233 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:20403) (avx2: 0) (512y: 0) (512z: 0) + 17,871,844,477 cycles # 2.856 GHz + 53,590,423,890 instructions # 3.00 insn per cycle + 6.259496797 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:20208) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087545108E-003 -Relative difference = 2.11977393295785e-08 +Avg ME (F77/C++) = 9.8479612087541066E-003 +Relative difference = 2.1197698286506752e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.202973e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.203448e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.203448e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.924321e-03 +- 4.918774e-03 ) GeV^-6 -TOTAL : 1.259077 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.319002e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.319403e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.319403e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 +TOTAL : 1.595862 sec INFO: No Floating Point Exceptions have been reported - 4,407,381,529 cycles # 3.497 GHz - 13,756,329,596 instructions # 3.12 insn per cycle - 1.260579726 seconds time elapsed + 4,573,738,949 cycles # 2.860 GHz + 13,762,785,828 instructions # 3.01 insn per cycle + 1.599904345 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:96986) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.847955e-03 Avg ME (F77/C++) = 9.8479546896527003E-003 Relative difference = 3.151388282563952e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 8.454907e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.456799e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.456799e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 -TOTAL : 0.626446 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.613525e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.615218e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.615218e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.804350 sec INFO: No Floating Point Exceptions have been reported - 2,071,381,535 cycles # 3.300 GHz - 4,811,292,358 instructions # 2.32 insn per cycle - 0.627929179 seconds time elapsed + 2,139,167,872 cycles # 2.648 GHz + 4,817,111,626 instructions # 2.25 insn per cycle + 0.808508083 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84904) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 Avg ME (F77/C++) = 9.8929728161070551E-003 Relative difference = 1.858823877057982e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 9.733338e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.735696e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.735696e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 -TOTAL : 0.544424 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.603124e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.605216e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.605216e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.699524 sec INFO: No Floating Point Exceptions have been reported - 1,800,615,366 cycles # 3.300 GHz - 4,268,858,978 instructions # 2.37 insn per cycle - 0.545874505 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80691) (512y: 12) (512z: 0) + 1,862,402,974 cycles # 2.650 GHz + 4,274,167,467 instructions # 2.29 insn per cycle + 0.703628099 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80610) (512y: 46) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 Avg ME (F77/C++) = 9.8929728161070551E-003 Relative difference = 1.858823877057982e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.708326e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.709082e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.709082e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.946834e-03 +- 4.941266e-03 ) GeV^-6 -TOTAL : 0.311599 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.580110e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.582197e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.582197e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 +TOTAL : 0.807828 sec INFO: No Floating Point Exceptions have been reported - 1,028,673,441 cycles # 3.288 GHz - 2,151,830,707 instructions # 2.09 insn per cycle - 0.313093887 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2903) (512y: 17) (512z:79304) + 1,354,037,726 cycles # 1.669 GHz + 2,159,114,420 instructions # 1.59 insn per cycle + 0.811949308 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2890) (512y: 49) (512z:79305) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892981e-03 Avg ME (F77/C++) = 9.8929811982958280E-003 Relative difference = 2.0044092642523172e-08 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt index 2f40c9b08b..2d3f5a3740 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt @@ -1,183 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-16_15:01:01 +DATE: 2024-05-16_14:46:34 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.539024e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.539847e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.540280e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 +TOTAL : 1.689103 sec +INFO: No Floating Point Exceptions have been reported + 5,627,474,622 cycles # 2.847 GHz + 11,923,534,222 instructions # 2.12 insn per cycle + 2.035228412 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.303686e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.304329e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.304469e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856829e-04 +- 8.333435e-05 ) GeV^-6 +TOTAL : 1.932451 sec +INFO: No Floating Point Exceptions have been reported + 6,311,455,519 cycles # 2.848 GHz + 13,762,708,375 instructions # 2.18 insn per cycle + 2.272906437 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.849636e-03 +Avg ME (F77/GPU) = 9.8712405367667715E-003 +Relative difference = 0.0021934350433631634 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.038056e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.038085e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.038085e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.924324e-03 +- 4.918778e-03 ) GeV^-6 -TOTAL : 5.089824 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.477402e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.477656e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.477656e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 +TOTAL : 6.233047 sec INFO: No Floating Point Exceptions have been reported - 17,811,569,824 cycles # 3.498 GHz - 53,427,844,878 instructions # 3.00 insn per cycle - 5.091356551 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:20415) (avx2: 0) (512y: 0) (512z: 0) + 17,803,580,317 cycles # 2.855 GHz + 53,580,069,164 instructions # 3.01 insn per cycle + 6.237030677 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:20207) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087257751E-003 -Relative difference = 2.119482139617284e-08 +Avg ME (F77/C++) = 9.8479612087582491E-003 +Relative difference = 2.1198118933954545e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.248857e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.249314e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.249314e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.924321e-03 +- 4.918774e-03 ) GeV^-6 -TOTAL : 1.245315 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.307415e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.307805e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.307805e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 +TOTAL : 1.601039 sec INFO: No Floating Point Exceptions have been reported - 4,359,180,439 cycles # 3.497 GHz - 13,748,783,981 instructions # 3.15 insn per cycle - 1.246825643 seconds time elapsed + 4,572,009,891 cycles # 2.850 GHz + 13,755,353,111 instructions # 3.01 insn per cycle + 1.605120576 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:96606) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.847955e-03 Avg ME (F77/C++) = 9.8479546896225560E-003 Relative difference = 3.151694379513441e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 8.483628e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.485527e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.485527e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 -TOTAL : 0.624141 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.585961e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.587683e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.587683e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.806516 sec INFO: No Floating Point Exceptions have been reported - 2,063,901,123 cycles # 3.300 GHz - 4,812,481,687 instructions # 2.33 insn per cycle - 0.625624461 seconds time elapsed + 2,141,220,761 cycles # 2.644 GHz + 4,818,439,860 instructions # 2.25 insn per cycle + 0.810543510 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:85359) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 Avg ME (F77/C++) = 9.8929728161070967E-003 Relative difference = 1.8588234562202478e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 9.780575e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.782924e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.782924e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 -TOTAL : 0.541603 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.583228e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.585349e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.585349e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.701072 sec INFO: No Floating Point Exceptions have been reported - 1,791,446,942 cycles # 3.300 GHz - 4,269,554,140 instructions # 2.38 insn per cycle - 0.543134521 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81101) (512y: 12) (512z: 0) + 1,870,651,613 cycles # 2.656 GHz + 4,275,203,774 instructions # 2.29 insn per cycle + 0.705038579 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81075) (512y: 26) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 Avg ME (F77/C++) = 9.8929728161070967E-003 Relative difference = 1.8588234562202478e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.714704e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.715434e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.715434e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.946834e-03 +- 4.941266e-03 ) GeV^-6 -TOTAL : 0.310403 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.570140e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.572065e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.572065e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 +TOTAL : 0.808794 sec INFO: No Floating Point Exceptions have been reported - 1,025,275,826 cycles # 3.290 GHz - 2,157,343,591 instructions # 2.10 insn per cycle - 0.311929273 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3497) (512y: 26) (512z:79498) + 1,356,929,556 cycles # 1.671 GHz + 2,164,613,956 instructions # 1.60 insn per cycle + 0.812781092 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3487) (512y: 34) (512z:79499) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892981e-03 Avg ME (F77/C++) = 9.8929811982955140E-003 Relative difference = 2.0044060904369713e-08 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 1b5ab77bc1..dfab5870bc 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,183 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-16_15:01:17 +DATE: 2024-05-16_14:47:13 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.689455e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.689959e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.690212e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 2.179203 sec +INFO: No Floating Point Exceptions have been reported + 7,126,539,551 cycles # 2.849 GHz + 15,807,759,758 instructions # 2.22 insn per cycle + 2.559686036 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.107889e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.108154e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.108192e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 3.424856 sec +INFO: No Floating Point Exceptions have been reported + 10,729,635,772 cycles # 2.852 GHz + 25,204,058,412 instructions # 2.35 insn per cycle + 3.820430433 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.872263e-03 +Avg ME (F77/GPU) = 9.8722599015656498E-003 +Relative difference = 3.1385249252060663e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 9.859763e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.860008e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.860008e+01 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 5.357830 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.303415e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.303607e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.303607e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 7.238072 sec INFO: No Floating Point Exceptions have been reported - 18,745,138,030 cycles # 3.498 GHz - 53,894,742,330 instructions # 2.88 insn per cycle - 5.359343965 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32196) (avx2: 0) (512y: 0) (512z: 0) + 19,150,406,884 cycles # 2.645 GHz + 54,154,394,762 instructions # 2.83 insn per cycle + 7.242308052 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32067) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595861831675E-003 Relative difference = 3.457988134687711e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.989274e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.989371e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.989371e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.657296 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.497154e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.497235e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.497235e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 3.531757 sec INFO: No Floating Point Exceptions have been reported - 9,296,382,189 cycles # 3.497 GHz - 26,144,686,060 instructions # 2.81 insn per cycle - 2.658817676 seconds time elapsed + 9,343,938,644 cycles # 2.643 GHz + 26,158,830,842 instructions # 2.80 insn per cycle + 3.535758073 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:96007) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594844308162E-003 Relative difference = 3.5610570575237004e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.475685e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.476158e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.476158e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.181640 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.453828e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.454275e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.454275e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.534198 sec INFO: No Floating Point Exceptions have been reported - 3,903,384,014 cycles # 3.300 GHz - 9,214,863,642 instructions # 2.36 insn per cycle - 1.183100156 seconds time elapsed + 4,069,691,610 cycles # 2.648 GHz + 9,228,168,046 instructions # 2.27 insn per cycle + 1.538179495 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84155) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 5.128514e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.129121e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.129121e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.031444 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.986335e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.986931e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.986931e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.329366 sec INFO: No Floating Point Exceptions have been reported - 3,407,965,790 cycles # 3.300 GHz - 8,163,219,670 instructions # 2.40 insn per cycle - 1.032920025 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:79927) (512y: 45) (512z: 0) + 3,528,184,184 cycles # 2.647 GHz + 8,174,614,993 instructions # 2.32 insn per cycle + 1.333451918 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:79844) (512y: 79) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 8.856196e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.858028e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.858028e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 0.598999 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.367967e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.368468e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.368468e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.573046 sec INFO: No Floating Point Exceptions have been reported - 1,975,737,268 cycles # 3.292 GHz - 4,145,284,175 instructions # 2.10 insn per cycle - 0.600485726 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2050) (512y: 61) (512z:78757) + 2,618,946,865 cycles # 1.661 GHz + 4,154,480,374 instructions # 1.59 insn per cycle + 1.577054610 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2046) (512y: 93) (512z:78760) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt index 28f52b50f2..3ddfb4805b 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt @@ -1,183 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-16_15:01:38 +DATE: 2024-05-16_14:48:06 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.679279e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.679786e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.680054e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 2.178048 sec +INFO: No Floating Point Exceptions have been reported + 7,134,880,740 cycles # 2.851 GHz + 15,434,594,866 instructions # 2.16 insn per cycle + 2.558453633 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.104221e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.104483e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.104525e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 3.411600 sec +INFO: No Floating Point Exceptions have been reported + 10,672,973,002 cycles # 2.855 GHz + 24,521,846,399 instructions # 2.30 insn per cycle + 3.794724712 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.872263e-03 +Avg ME (F77/GPU) = 9.8722599015656498E-003 +Relative difference = 3.1385249252060663e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 9.878190e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.878445e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.878445e+01 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 5.347497 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.893217e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.893439e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.893439e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 6.694276 sec INFO: No Floating Point Exceptions have been reported - 18,713,748,882 cycles # 3.499 GHz - 53,885,198,082 instructions # 2.88 insn per cycle - 5.348966480 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32348) (avx2: 0) (512y: 0) (512z: 0) + 19,121,414,788 cycles # 2.855 GHz + 54,156,458,090 instructions # 2.83 insn per cycle + 6.698138270 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32244) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595861831675E-003 Relative difference = 3.457988134687711e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.975955e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.976053e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.976053e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.674536 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.495395e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.495480e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.495480e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 3.535528 sec INFO: No Floating Point Exceptions have been reported - 9,357,026,662 cycles # 3.497 GHz - 26,063,941,669 instructions # 2.79 insn per cycle - 2.676039271 seconds time elapsed + 9,391,010,006 cycles # 2.654 GHz + 26,079,707,862 instructions # 2.78 insn per cycle + 3.539600596 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:95901) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594844308162E-003 Relative difference = 3.5610570575237004e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.467419e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.467877e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.467877e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.183646 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.518532e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.518969e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.518969e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.505616 sec INFO: No Floating Point Exceptions have been reported - 3,910,407,013 cycles # 3.300 GHz - 9,200,492,907 instructions # 2.35 insn per cycle - 1.185220543 seconds time elapsed + 4,001,150,405 cycles # 2.652 GHz + 9,212,868,850 instructions # 2.30 insn per cycle + 1.509560632 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83776) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 5.090014e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.090640e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.090640e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.039104 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.985927e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.986486e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.986486e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.329709 sec INFO: No Floating Point Exceptions have been reported - 3,433,257,963 cycles # 3.300 GHz - 8,155,419,511 instructions # 2.38 insn per cycle - 1.040631977 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:79383) (512y: 215) (512z: 0) + 3,529,740,112 cycles # 2.648 GHz + 8,168,252,869 instructions # 2.31 insn per cycle + 1.333651402 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:79373) (512y: 229) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 8.836430e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.838249e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.838249e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 0.600012 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.359545e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.360045e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.360045e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.576721 sec INFO: No Floating Point Exceptions have been reported - 1,982,102,437 cycles # 3.297 GHz - 4,144,296,349 instructions # 2.09 insn per cycle - 0.601505064 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1504) (512y: 165) (512z:78775) + 2,623,702,370 cycles # 1.660 GHz + 4,153,356,804 instructions # 1.58 insn per cycle + 1.580839869 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1493) (512y: 175) (512z:78776) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index c79abb2bfe..38bc670a18 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -1,183 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-16_14:59:37 +DATE: 2024-05-16_14:42:48 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.927387e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.315718e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.634653e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.452451 sec +INFO: No Floating Point Exceptions have been reported + 1,879,085,625 cycles # 2.815 GHz + 2,632,406,951 instructions # 1.40 insn per cycle + 0.724903288 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.675522e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.208336e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.557322e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.534593 sec +INFO: No Floating Point Exceptions have been reported + 2,165,913,457 cycles # 2.812 GHz + 3,139,398,529 instructions # 1.45 insn per cycle + 0.827804422 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424749e-01 +Avg ME (F77/GPU) = 0.14247482467490466 +Relative difference = 5.286902838873106e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.386099e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.414360e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.414360e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 1.207395 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.011560e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.033153e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.033153e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.642980 sec INFO: No Floating Point Exceptions have been reported - 4,225,513,131 cycles # 3.496 GHz - 13,434,518,635 instructions # 3.18 insn per cycle - 1.209109512 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 864) (avx2: 0) (512y: 0) (512z: 0) + 4,710,402,412 cycles # 2.861 GHz + 13,462,495,012 instructions # 2.86 insn per cycle + 1.647108070 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 860) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499484 Relative difference = 5.286896509487005e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.528838e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.623080e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.623080e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.672337 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.839775e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.910542e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.910542e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.913158 sec INFO: No Floating Point Exceptions have been reported - 2,355,454,188 cycles # 3.496 GHz - 7,541,752,748 instructions # 3.20 insn per cycle - 0.673997979 seconds time elapsed + 2,620,816,977 cycles # 2.859 GHz + 7,551,970,333 instructions # 2.88 insn per cycle + 0.917276709 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3099) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499478 Relative difference = 5.28689651338321e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.526365e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.811448e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.811448e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.385795 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.970408e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.156692e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.156692e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.574933 sec INFO: No Floating Point Exceptions have been reported - 1,283,679,728 cycles # 3.315 GHz - 3,104,813,956 instructions # 2.42 insn per cycle - 0.387468424 seconds time elapsed + 1,480,758,822 cycles # 2.560 GHz + 3,119,703,419 instructions # 2.11 insn per cycle + 0.579132992 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2917) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492595 Relative difference = 5.286901344678233e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.658264e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.962079e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.962079e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.376150 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.428889e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.682851e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.682851e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.501020 sec INFO: No Floating Point Exceptions have been reported - 1,251,723,471 cycles # 3.316 GHz - 2,979,643,067 instructions # 2.38 insn per cycle - 0.377805001 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2745) (512y: 84) (512z: 0) + 1,347,520,276 cycles # 2.670 GHz + 2,981,434,055 instructions # 2.21 insn per cycle + 0.505363497 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2694) (512y: 104) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492595 Relative difference = 5.286901344678233e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 5.340381e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.737920e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.737920e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.331292 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.241546e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.347861e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.347861e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.756225 sec INFO: No Floating Point Exceptions have been reported - 1,100,815,129 cycles # 3.309 GHz - 1,919,723,094 instructions # 1.74 insn per cycle - 0.332945646 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1365) (512y: 84) (512z: 2171) + 1,330,320,612 cycles # 1.751 GHz + 1,953,406,018 instructions # 1.47 insn per cycle + 0.760489864 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1372) (512y: 106) (512z: 2173) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492595 Relative difference = 5.286901344678233e-07 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt index 51d9abbcc0..6f141963c0 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt @@ -1,188 +1,246 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-16_15:13:25 +DATE: 2024-05-16_15:02:27 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.428295e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.103056e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.103056e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.480636 sec +INFO: No Floating Point Exceptions have been reported + 1,959,891,585 cycles # 2.818 GHz + 2,927,619,706 instructions # 1.49 insn per cycle + 0.752080667 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.157968e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.371122e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.371122e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.764001 sec +INFO: No Floating Point Exceptions have been reported + 2,873,640,599 cycles # 2.829 GHz + 4,407,079,803 instructions # 1.53 insn per cycle + 1.073816079 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424749e-01 +Avg ME (F77/GPU) = 0.14247482467490466 +Relative difference = 5.286902838873106e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.383278e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.411625e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.411625e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 1.214078 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.008642e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.030371e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.030371e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.654853 sec INFO: No Floating Point Exceptions have been reported - 4,247,198,847 cycles # 3.494 GHz - 13,441,802,925 instructions # 3.16 insn per cycle - 1.216035831 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 864) (avx2: 0) (512y: 0) (512z: 0) + 4,747,034,662 cycles # 2.862 GHz + 13,469,694,473 instructions # 2.84 insn per cycle + 1.659302078 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 860) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499484 Relative difference = 5.286896509487005e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.521394e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.615345e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.615345e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.678956 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.820348e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.892211e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.892211e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.930951 sec INFO: No Floating Point Exceptions have been reported - 2,376,586,422 cycles # 3.492 GHz - 7,591,894,788 instructions # 3.19 insn per cycle - 0.680920167 seconds time elapsed + 2,665,977,292 cycles # 2.852 GHz + 7,601,998,240 instructions # 2.85 insn per cycle + 0.935555380 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3099) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499478 Relative difference = 5.28689651338321e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.507955e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.788891e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.788891e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.392379 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.068620e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.272960e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.272960e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.564348 sec INFO: No Floating Point Exceptions have been reported - 1,307,070,860 cycles # 3.317 GHz - 3,156,514,594 instructions # 2.41 insn per cycle - 0.394336567 seconds time elapsed + 1,513,664,570 cycles # 2.669 GHz + 3,168,463,518 instructions # 2.09 insn per cycle + 0.568761168 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2917) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492595 Relative difference = 5.286901344678233e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.627192e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.926583e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.926583e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.383626 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.408389e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.655047e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.655047e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.511104 sec INFO: No Floating Point Exceptions have been reported - 1,278,297,344 cycles # 3.318 GHz - 3,030,841,272 instructions # 2.37 insn per cycle - 0.385587020 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2745) (512y: 84) (512z: 0) + 1,377,582,779 cycles # 2.675 GHz + 3,030,644,125 instructions # 2.20 insn per cycle + 0.515560343 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2694) (512y: 104) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492595 Relative difference = 5.286901344678233e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 5.313342e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.706568e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.706568e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.338091 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.221799e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.329402e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.329402e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.769820 sec INFO: No Floating Point Exceptions have been reported - 1,125,703,318 cycles # 3.313 GHz - 1,959,620,458 instructions # 1.74 insn per cycle - 0.340051363 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1365) (512y: 84) (512z: 2171) + 1,366,102,927 cycles # 1.765 GHz + 1,991,071,116 instructions # 1.46 insn per cycle + 0.774386560 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1372) (512y: 106) (512z: 2173) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492595 Relative difference = 5.286901344678233e-07 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt index 68fb87c782..25b8d3c885 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt @@ -1,183 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-16_14:59:42 +DATE: 2024-05-16_14:43:01 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.907170e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.197971e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.504611e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.455771 sec +INFO: No Floating Point Exceptions have been reported + 1,881,865,516 cycles # 2.813 GHz + 2,669,782,801 instructions # 1.42 insn per cycle + 0.727786761 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.641992e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.081273e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.416654e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.532301 sec +INFO: No Floating Point Exceptions have been reported + 2,167,822,822 cycles # 2.823 GHz + 3,120,353,321 instructions # 1.44 insn per cycle + 0.825343283 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424749e-01 +Avg ME (F77/GPU) = 0.14247482467490466 +Relative difference = 5.286902838873106e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.397006e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.425726e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.425726e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 1.198001 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.007784e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.029112e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.029112e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.648995 sec INFO: No Floating Point Exceptions have been reported - 4,195,031,738 cycles # 3.498 GHz - 13,428,169,197 instructions # 3.20 insn per cycle - 1.199627126 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 853) (avx2: 0) (512y: 0) (512z: 0) + 4,725,323,359 cycles # 2.860 GHz + 13,457,369,308 instructions # 2.85 insn per cycle + 1.653142214 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 849) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499484 Relative difference = 5.286896509487005e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.509751e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.602059e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.602059e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.676983 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.833913e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.904030e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.904030e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.915889 sec INFO: No Floating Point Exceptions have been reported - 2,371,091,046 cycles # 3.495 GHz - 7,538,133,395 instructions # 3.18 insn per cycle - 0.678626428 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3094) (avx2: 0) (512y: 0) (512z: 0) + 2,628,184,982 cycles # 2.858 GHz + 7,551,273,836 instructions # 2.87 insn per cycle + 0.920086997 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3092) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499478 Relative difference = 5.28689651338321e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.525255e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.811956e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.811956e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.385964 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.116183e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.320457e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.320457e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.548674 sec INFO: No Floating Point Exceptions have been reported - 1,284,306,935 cycles # 3.315 GHz - 3,102,646,201 instructions # 2.42 insn per cycle - 0.387645337 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2901) (512y: 0) (512z: 0) + 1,476,841,675 cycles # 2.675 GHz + 3,117,924,257 instructions # 2.11 insn per cycle + 0.552738607 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2900) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492595 Relative difference = 5.286901344678233e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.660507e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.964576e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.964576e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.375829 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.456247e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.706124e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.706124e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.496887 sec INFO: No Floating Point Exceptions have been reported - 1,250,747,878 cycles # 3.316 GHz - 2,974,193,052 instructions # 2.38 insn per cycle - 0.377484944 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2721) (512y: 84) (512z: 0) + 1,340,057,166 cycles # 2.677 GHz + 2,978,732,248 instructions # 2.22 insn per cycle + 0.501058940 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2670) (512y: 104) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492595 Relative difference = 5.286901344678233e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 5.348464e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.748420e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.748420e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.330689 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.241283e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.347840e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.347840e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.757118 sec INFO: No Floating Point Exceptions have been reported - 1,100,885,696 cycles # 3.315 GHz - 1,916,709,402 instructions # 1.74 insn per cycle - 0.332395868 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1341) (512y: 84) (512z: 2171) + 1,329,966,748 cycles # 1.749 GHz + 1,951,787,640 instructions # 1.47 insn per cycle + 0.761356492 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1348) (512y: 106) (512z: 2173) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492595 Relative difference = 5.286901344678233e-07 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index a36100034f..88eaa7d80d 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -1,183 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-16_14:59:46 +DATE: 2024-05-16_14:43:15 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.867335e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.223690e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.343650e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 +TOTAL : 0.450195 sec +INFO: No Floating Point Exceptions have been reported + 1,886,543,936 cycles # 2.814 GHz + 2,627,629,254 instructions # 1.39 insn per cycle + 0.729554150 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 167 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.183442e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.842494e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.962990e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.571361e+02 +- 2.114021e+02 ) GeV^-2 +TOTAL : 0.482910 sec +INFO: No Floating Point Exceptions have been reported + 1,994,696,147 cycles # 2.812 GHz + 2,828,466,882 instructions # 1.42 insn per cycle + 0.766894337 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424226e-01 +Avg ME (F77/GPU) = 0.14247488790821983 +Relative difference = 0.00036713209996037764 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.485753e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.520480e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.520480e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.945528e+02 +- 1.186199e+02 ) GeV^-2 -TOTAL : 1.126850 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.069532e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.093791e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.093791e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 1.553490 sec INFO: No Floating Point Exceptions have been reported - 3,940,537,374 cycles # 3.493 GHz - 13,031,883,273 instructions # 3.31 insn per cycle - 1.128447112 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 748) (avx2: 0) (512y: 0) (512z: 0) + 4,455,366,971 cycles # 2.862 GHz + 13,047,769,817 instructions # 2.93 insn per cycle + 1.557630020 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 745) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 Avg ME (F77/C++) = 0.14246857540270419 Relative difference = 1.7265064590569047e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.870722e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.117280e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.117280e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.945527e+02 +- 1.186198e+02 ) GeV^-2 -TOTAL : 0.446176 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.869084e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.052765e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.052765e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 +TOTAL : 0.592151 sec INFO: No Floating Point Exceptions have been reported - 1,563,614,285 cycles # 3.494 GHz - 4,506,162,514 instructions # 2.88 insn per cycle - 0.447718655 seconds time elapsed + 1,701,146,602 cycles # 2.856 GHz + 4,512,165,265 instructions # 2.65 insn per cycle + 0.596248693 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3600) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 Avg ME (F77/C++) = 0.14246859631675157 Relative difference = 2.5853054135974944e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 7.914888e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.990599e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.990599e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.947128e+02 +- 1.186880e+02 ) GeV^-2 -TOTAL : 0.229557 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.609679e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.315056e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.315056e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.313413 sec INFO: No Floating Point Exceptions have been reported - 766,484,742 cycles # 3.320 GHz - 1,884,106,082 instructions # 2.46 insn per cycle - 0.231166023 seconds time elapsed + 850,737,642 cycles # 2.684 GHz + 1,895,945,890 instructions # 2.23 insn per cycle + 0.317546154 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3491) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247489318272599 Relative difference = 4.784894739577799e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 7.880271e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.870021e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.870021e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.947128e+02 +- 1.186880e+02 ) GeV^-2 -TOTAL : 0.231018 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.973396e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.785303e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.785303e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.295859 sec INFO: No Floating Point Exceptions have been reported - 770,973,435 cycles # 3.319 GHz - 1,810,844,093 instructions # 2.35 insn per cycle - 0.232626961 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3390) (512y: 2) (512z: 0) + 801,819,935 cycles # 2.679 GHz + 1,819,229,849 instructions # 2.27 insn per cycle + 0.299944027 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3335) (512y: 22) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247489318272599 Relative difference = 4.784894739577799e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.007666e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.178962e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.178962e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.947128e+02 +- 1.186880e+02 ) GeV^-2 -TOTAL : 0.185655 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.354956e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.770974e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.770974e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.400076 sec INFO: No Floating Point Exceptions have been reported - 619,563,101 cycles # 3.313 GHz - 1,280,713,055 instructions # 2.07 insn per cycle - 0.187266622 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1976) (512y: 8) (512z: 2382) + 733,009,701 cycles # 1.817 GHz + 1,304,250,799 instructions # 1.78 insn per cycle + 0.404216975 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1973) (512y: 32) (512z: 2382) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247489383243206 Relative difference = 4.32888033512879e-08 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt index 3975933cab..b62a8a0309 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt @@ -1,188 +1,246 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-16_15:13:30 +DATE: 2024-05-16_15:02:40 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.337579e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.030007e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.030007e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.017654e+01 +- 1.429184e+01 ) GeV^-2 +TOTAL : 0.462122 sec +INFO: No Floating Point Exceptions have been reported + 1,901,719,201 cycles # 2.816 GHz + 2,811,032,752 instructions # 1.48 insn per cycle + 0.731978994 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 167 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.907303e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.566216e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.566216e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.609942e+02 +- 2.115590e+02 ) GeV^-2 +TOTAL : 0.637098 sec +INFO: No Floating Point Exceptions have been reported + 2,459,040,544 cycles # 2.824 GHz + 3,715,271,980 instructions # 1.51 insn per cycle + 0.927773682 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424226e-01 +Avg ME (F77/GPU) = 0.14247488790821983 +Relative difference = 0.00036713209996037764 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.486105e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.520771e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.520771e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.945528e+02 +- 1.186199e+02 ) GeV^-2 -TOTAL : 1.128596 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.068152e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.092782e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.092782e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 1.559403 sec INFO: No Floating Point Exceptions have been reported - 3,949,924,947 cycles # 3.495 GHz - 13,036,365,567 instructions # 3.30 insn per cycle - 1.130311976 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 748) (avx2: 0) (512y: 0) (512z: 0) + 4,475,912,555 cycles # 2.864 GHz + 13,052,235,712 instructions # 2.92 insn per cycle + 1.563691095 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 745) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 Avg ME (F77/C++) = 0.14246857540270419 Relative difference = 1.7265064590569047e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.876468e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.123023e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.123023e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.945527e+02 +- 1.186198e+02 ) GeV^-2 -TOTAL : 0.448714 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.856394e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.039884e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.039884e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 +TOTAL : 0.599599 sec INFO: No Floating Point Exceptions have been reported - 1,571,333,248 cycles # 3.493 GHz - 4,554,717,145 instructions # 2.90 insn per cycle - 0.450463633 seconds time elapsed + 1,723,185,860 cycles # 2.856 GHz + 4,560,285,596 instructions # 2.65 insn per cycle + 0.603925442 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3600) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 Avg ME (F77/C++) = 0.14246859631675157 Relative difference = 2.5853054135974944e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 7.868753e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.932885e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.932885e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.947128e+02 +- 1.186880e+02 ) GeV^-2 -TOTAL : 0.233735 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.545801e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.241062e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.241062e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.321141 sec INFO: No Floating Point Exceptions have been reported - 781,080,214 cycles # 3.320 GHz - 1,921,555,601 instructions # 2.46 insn per cycle - 0.235573975 seconds time elapsed + 871,513,310 cycles # 2.683 GHz + 1,932,959,243 instructions # 2.22 insn per cycle + 0.325378385 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3491) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247489318272599 Relative difference = 4.784894739577799e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 8.150771e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.214813e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.214813e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.947128e+02 +- 1.186880e+02 ) GeV^-2 -TOTAL : 0.227020 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.891127e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.696072e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.696072e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.304221 sec INFO: No Floating Point Exceptions have been reported - 758,893,869 cycles # 3.321 GHz - 1,847,792,857 instructions # 2.43 insn per cycle - 0.228761763 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3390) (512y: 2) (512z: 0) + 825,995,486 cycles # 2.683 GHz + 1,856,161,781 instructions # 2.25 insn per cycle + 0.308416114 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3335) (512y: 22) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247489318272599 Relative difference = 4.784894739577799e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 9.952242e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.166037e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.166037e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.947128e+02 +- 1.186880e+02 ) GeV^-2 -TOTAL : 0.191095 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.307702e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.720545e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.720545e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.408443 sec INFO: No Floating Point Exceptions have been reported - 638,761,005 cycles # 3.316 GHz - 1,322,785,295 instructions # 2.07 insn per cycle - 0.192912452 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1976) (512y: 8) (512z: 2382) + 755,445,387 cycles # 1.833 GHz + 1,345,989,570 instructions # 1.78 insn per cycle + 0.412779323 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1973) (512y: 32) (512z: 2382) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247489383243206 Relative difference = 4.32888033512879e-08 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt index c30289d39f..f782cd39a5 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt @@ -1,183 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-16_14:59:50 +DATE: 2024-05-16_14:43:27 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.882997e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.225822e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.344729e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 +TOTAL : 0.447687 sec +INFO: No Floating Point Exceptions have been reported + 1,891,564,072 cycles # 2.816 GHz + 2,660,739,786 instructions # 1.41 insn per cycle + 0.729746219 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 167 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.107850e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.805980e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.921999e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.571361e+02 +- 2.114021e+02 ) GeV^-2 +TOTAL : 0.485790 sec +INFO: No Floating Point Exceptions have been reported + 1,996,906,378 cycles # 2.807 GHz + 2,867,667,096 instructions # 1.44 insn per cycle + 0.769333150 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424226e-01 +Avg ME (F77/GPU) = 0.14247488790821983 +Relative difference = 0.00036713209996037764 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.494286e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.529263e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.529263e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.945528e+02 +- 1.186199e+02 ) GeV^-2 -TOTAL : 1.120381 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.069812e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.094168e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.094168e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 1.553096 sec INFO: No Floating Point Exceptions have been reported - 3,920,701,706 cycles # 3.495 GHz - 13,012,702,962 instructions # 3.32 insn per cycle - 1.121977841 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 732) (avx2: 0) (512y: 0) (512z: 0) + 4,454,505,799 cycles # 2.862 GHz + 13,029,391,838 instructions # 2.92 insn per cycle + 1.557292510 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 727) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 Avg ME (F77/C++) = 0.14246857540270419 Relative difference = 1.7265064590569047e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.864338e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.109744e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.109744e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.945527e+02 +- 1.186198e+02 ) GeV^-2 -TOTAL : 0.446774 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.876347e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.060596e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.060596e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 +TOTAL : 0.590255 sec INFO: No Floating Point Exceptions have been reported - 1,565,647,947 cycles # 3.494 GHz - 4,502,366,916 instructions # 2.88 insn per cycle - 0.448316706 seconds time elapsed + 1,693,495,983 cycles # 2.852 GHz + 4,508,141,451 instructions # 2.66 insn per cycle + 0.594398488 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3588) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 Avg ME (F77/C++) = 0.14246859631675157 Relative difference = 2.5853054135974944e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 7.941053e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.024320e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.024320e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.947128e+02 +- 1.186880e+02 ) GeV^-2 -TOTAL : 0.228791 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.574680e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.273652e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.273652e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.315080 sec INFO: No Floating Point Exceptions have been reported - 763,896,479 cycles # 3.320 GHz - 1,881,145,730 instructions # 2.46 insn per cycle - 0.230398503 seconds time elapsed + 851,359,645 cycles # 2.672 GHz + 1,893,112,803 instructions # 2.22 insn per cycle + 0.319204462 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3461) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247489318272599 Relative difference = 4.784894739577799e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 8.226785e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.314429e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.314429e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.947128e+02 +- 1.186880e+02 ) GeV^-2 -TOTAL : 0.222236 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.978403e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.785893e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.785893e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.295093 sec INFO: No Floating Point Exceptions have been reported - 742,272,381 cycles # 3.320 GHz - 1,806,863,379 instructions # 2.43 insn per cycle - 0.223884202 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3353) (512y: 2) (512z: 0) + 799,712,323 cycles # 2.678 GHz + 1,814,979,638 instructions # 2.27 insn per cycle + 0.299228201 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3298) (512y: 22) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247489318272599 Relative difference = 4.784894739577799e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.007631e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.177487e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.177487e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.947128e+02 +- 1.186880e+02 ) GeV^-2 -TOTAL : 0.185723 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.317992e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.737735e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.737735e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.402763 sec INFO: No Floating Point Exceptions have been reported - 619,200,970 cycles # 3.309 GHz - 1,278,602,814 instructions # 2.06 insn per cycle - 0.187418956 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1939) (512y: 8) (512z: 2382) + 736,511,578 cycles # 1.812 GHz + 1,302,115,541 instructions # 1.77 insn per cycle + 0.406867415 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1936) (512y: 32) (512z: 2382) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247489383243206 Relative difference = 4.32888033512879e-08 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index 3a946024f8..265a4a7626 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -1,183 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-16_14:59:53 +DATE: 2024-05-16_14:43:39 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.940149e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.336219e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.662963e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.452840 sec +INFO: No Floating Point Exceptions have been reported + 1,880,363,198 cycles # 2.808 GHz + 2,677,692,820 instructions # 1.42 insn per cycle + 0.726161506 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.684159e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.236315e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.588311e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.530728 sec +INFO: No Floating Point Exceptions have been reported + 2,164,642,485 cycles # 2.821 GHz + 3,145,530,012 instructions # 1.45 insn per cycle + 0.824333778 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424749e-01 +Avg ME (F77/GPU) = 0.14247482577104625 +Relative difference = 5.209967070245855e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.382158e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.410400e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.410400e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 1.210878 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.003476e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.024445e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.024445e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.655769 sec INFO: No Floating Point Exceptions have been reported - 4,238,290,901 cycles # 3.496 GHz - 13,408,111,909 instructions # 3.16 insn per cycle - 1.212529715 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 836) (avx2: 0) (512y: 0) (512z: 0) + 4,745,491,139 cycles # 2.860 GHz + 13,466,039,366 instructions # 2.84 insn per cycle + 1.659848552 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 840) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482734618697 Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.561388e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.658011e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.658011e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.664044 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.849332e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.920343e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.920343e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.908133 sec INFO: No Floating Point Exceptions have been reported - 2,325,909,827 cycles # 3.495 GHz - 7,374,349,981 instructions # 3.17 insn per cycle - 0.665822359 seconds time elapsed + 2,605,721,632 cycles # 2.858 GHz + 7,384,650,569 instructions # 2.83 insn per cycle + 0.912227813 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3073) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482734618697 Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.607029e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.900618e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.900618e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.379426 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.133010e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.340359e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.340359e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.546033 sec INFO: No Floating Point Exceptions have been reported - 1,262,584,228 cycles # 3.316 GHz - 3,036,424,627 instructions # 2.40 insn per cycle - 0.381094498 seconds time elapsed + 1,469,888,298 cycles # 2.674 GHz + 3,055,461,884 instructions # 2.08 insn per cycle + 0.550169150 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3013) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.781169e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.102295e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.102295e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.367326 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.544324e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.807645e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.807645e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.485213 sec INFO: No Floating Point Exceptions have been reported - 1,222,602,925 cycles # 3.315 GHz - 2,924,042,140 instructions # 2.39 insn per cycle - 0.368965475 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2850) (512y: 90) (512z: 0) + 1,307,959,720 cycles # 2.676 GHz + 2,930,377,532 instructions # 2.24 insn per cycle + 0.489382978 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2799) (512y: 110) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 5.115607e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.478719e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.478719e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.344683 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.172350e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.272043e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.272043e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.779592 sec INFO: No Floating Point Exceptions have been reported - 1,147,285,653 cycles # 3.314 GHz - 1,931,282,298 instructions # 1.68 insn per cycle - 0.346427078 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1693) (512y: 92) (512z: 2169) + 1,368,592,699 cycles # 1.747 GHz + 1,969,378,714 instructions # 1.44 insn per cycle + 0.783958712 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1700) (512y: 114) (512z: 2171) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt index 095b604bbf..84e80111cc 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt @@ -1,183 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-16_14:59:58 +DATE: 2024-05-16_14:43:53 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.890956e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.181054e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.513059e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.453973 sec +INFO: No Floating Point Exceptions have been reported + 1,876,167,670 cycles # 2.808 GHz + 2,662,885,558 instructions # 1.42 insn per cycle + 0.726739496 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.642147e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.081360e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.416296e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.533908 sec +INFO: No Floating Point Exceptions have been reported + 2,163,893,097 cycles # 2.818 GHz + 3,132,561,280 instructions # 1.45 insn per cycle + 0.826852700 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424749e-01 +Avg ME (F77/GPU) = 0.14247482577104625 +Relative difference = 5.209967070245855e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.382537e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.410702e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.410702e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 1.210256 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.007176e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.028375e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.028375e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.650928 sec INFO: No Floating Point Exceptions have been reported - 4,238,104,879 cycles # 3.498 GHz - 13,396,938,147 instructions # 3.16 insn per cycle - 1.211923293 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 826) (avx2: 0) (512y: 0) (512z: 0) + 4,733,031,285 cycles # 2.861 GHz + 13,451,191,160 instructions # 2.84 insn per cycle + 1.655053441 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 827) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482734618697 Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.560416e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.657071e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.657071e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.664319 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.847760e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.919370e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.919370e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.909156 sec INFO: No Floating Point Exceptions have been reported - 2,325,818,465 cycles # 3.494 GHz - 7,370,246,475 instructions # 3.17 insn per cycle - 0.666036769 seconds time elapsed + 2,606,818,939 cycles # 2.857 GHz + 7,388,977,556 instructions # 2.83 insn per cycle + 0.913243210 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3062) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482734618697 Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.610639e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.904142e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.904142e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.378982 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.915489e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.093943e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.093943e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.585235 sec INFO: No Floating Point Exceptions have been reported - 1,261,007,643 cycles # 3.316 GHz - 3,032,344,059 instructions # 2.40 insn per cycle - 0.380605351 seconds time elapsed + 1,469,957,671 cycles # 2.496 GHz + 3,055,084,256 instructions # 2.08 insn per cycle + 0.589443028 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2990) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.784483e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.105789e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.105789e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.366888 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.535422e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.797003e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.797003e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.486063 sec INFO: No Floating Point Exceptions have been reported - 1,221,624,837 cycles # 3.317 GHz - 2,920,392,755 instructions # 2.39 insn per cycle - 0.368559603 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2826) (512y: 90) (512z: 0) + 1,306,700,125 cycles # 2.669 GHz + 2,930,583,524 instructions # 2.24 insn per cycle + 0.490171496 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2775) (512y: 110) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 5.106556e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.468206e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.468206e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.345213 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.173668e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.273111e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.273111e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.778991 sec INFO: No Floating Point Exceptions have been reported - 1,146,891,573 cycles # 3.308 GHz - 1,929,354,616 instructions # 1.68 insn per cycle - 0.346978661 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1669) (512y: 92) (512z: 2169) + 1,367,910,665 cycles # 1.749 GHz + 1,969,371,455 instructions # 1.44 insn per cycle + 0.783143035 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1676) (512y: 114) (512z: 2171) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt index f2b066352d..8af6873425 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-05-16_15:27:03 +DATE: 2024-05-16_15:20:33 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.588343e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.081541e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.176224e+08 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 0.528808 sec +INFO: No Floating Point Exceptions have been reported + 2,192,111,166 cycles # 2.821 GHz + 3,135,008,318 instructions # 1.43 insn per cycle + 0.833908791 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 4.313472e+00 +Avg ME (F77/GPU) = 4.3134710926110280 +Relative difference = 2.1036162329561614e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.552827e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.615371e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.615371e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 4.237983 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.865233e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.915227e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.915227e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 5.734356 sec INFO: No Floating Point Exceptions have been reported - 14,834,141,883 cycles # 3.498 GHz - 42,358,662,682 instructions # 2.86 insn per cycle - 4.240692019 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 715) (avx2: 0) (512y: 0) (512z: 0) + 16,430,057,220 cycles # 2.863 GHz + 42,484,854,801 instructions # 2.59 insn per cycle + 5.739849036 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 711) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105795 Relative difference = 2.1036172727915933e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.429207e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.635680e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.635680e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 2.495408 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.235376e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.401567e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.401567e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.353450 sec INFO: No Floating Point Exceptions have been reported - 8,733,845,375 cycles # 3.496 GHz - 26,311,106,876 instructions # 3.01 insn per cycle - 2.498254604 seconds time elapsed + 9,612,345,009 cycles # 2.863 GHz + 26,317,248,003 instructions # 2.74 insn per cycle + 3.358813940 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2388) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105804 Relative difference = 2.103617270732513e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 7.512447e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.100995e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.100995e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 1.524145 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.244474e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.678972e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.678972e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.111219 sec INFO: No Floating Point Exceptions have been reported - 5,082,843,549 cycles # 3.329 GHz - 12,004,216,763 instructions # 2.36 insn per cycle - 1.527047583 seconds time elapsed + 5,673,148,574 cycles # 2.682 GHz + 12,029,125,150 instructions # 2.12 insn per cycle + 2.116589548 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2532) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 7.691238e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.313521e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.313521e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 1.488661 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.759844e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.282682e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.282682e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 1.934603 sec INFO: No Floating Point Exceptions have been reported - 4,964,913,708 cycles # 3.329 GHz - 11,230,782,048 instructions # 2.26 insn per cycle - 1.491517590 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2246) (512y: 128) (512z: 0) + 5,185,525,755 cycles # 2.675 GHz + 11,158,849,555 instructions # 2.15 insn per cycle + 1.940086470 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2195) (512y: 148) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 7.995948e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.646934e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.646934e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 1.436277 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.492671e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.676216e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.676216e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.114794 sec INFO: No Floating Point Exceptions have been reported - 4,789,679,060 cycles # 3.329 GHz - 7,949,916,272 instructions # 1.66 insn per cycle - 1.439141121 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1464) (512y: 107) (512z: 1682) + 5,530,850,143 cycles # 1.773 GHz + 8,071,834,418 instructions # 1.46 insn per cycle + 3.120392658 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1471) (512y: 129) (512z: 1684) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt index d2f36f71b3..746b04ecac 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-05-16_15:27:16 +DATE: 2024-05-16_15:20:58 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.594523e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.092654e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.188255e+08 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 0.533303 sec +INFO: No Floating Point Exceptions have been reported + 2,159,610,833 cycles # 2.816 GHz + 3,095,961,302 instructions # 1.43 insn per cycle + 0.825364511 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 4.313472e+00 +Avg ME (F77/GPU) = 4.3134710926110280 +Relative difference = 2.1036162329561614e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.535565e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.597334e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.597334e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 4.265484 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.884407e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.935333e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.935333e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 5.676327 sec INFO: No Floating Point Exceptions have been reported - 14,925,074,351 cycles # 3.497 GHz - 43,040,455,238 instructions # 2.88 insn per cycle - 4.268333311 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 661) (avx2: 0) (512y: 0) (512z: 0) + 16,262,813,557 cycles # 2.863 GHz + 43,266,807,177 instructions # 2.66 insn per cycle + 5.681729392 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 662) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105795 Relative difference = 2.1036172727915933e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.582132e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.803494e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.803494e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 2.416903 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.290556e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.463505e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.463505e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.298765 sec INFO: No Floating Point Exceptions have been reported - 8,463,102,793 cycles # 3.498 GHz - 25,423,102,317 instructions # 3.00 insn per cycle - 2.419752177 seconds time elapsed + 9,454,937,516 cycles # 2.862 GHz + 25,430,832,847 instructions # 2.69 insn per cycle + 3.304226277 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2268) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105804 Relative difference = 2.103617270732513e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 6.828940e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.313322e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.313322e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 1.664399 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.695348e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.042916e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.042916e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.345615 sec INFO: No Floating Point Exceptions have been reported - 5,543,617,863 cycles # 3.326 GHz - 13,614,779,414 instructions # 2.46 insn per cycle - 1.667241581 seconds time elapsed + 6,296,882,273 cycles # 2.679 GHz + 13,638,682,807 instructions # 2.17 insn per cycle + 2.351107442 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2629) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 6.990714e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.496407e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.496407e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 1.626291 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.910957e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.286382e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.286382e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.246822 sec INFO: No Floating Point Exceptions have been reported - 5,418,315,687 cycles # 3.327 GHz - 12,664,280,464 instructions # 2.34 insn per cycle - 1.629124739 seconds time elapsed + 6,026,491,701 cycles # 2.677 GHz + 12,722,860,113 instructions # 2.11 insn per cycle + 2.252413644 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2146) (512y: 296) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 8.106458e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.780667e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.780667e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 1.418617 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.420299e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.596534e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.596534e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.177504 sec INFO: No Floating Point Exceptions have been reported - 4,731,424,271 cycles # 3.329 GHz - 8,833,879,203 instructions # 1.87 insn per cycle - 1.421431673 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1353) (512y: 160) (512z: 1776) + 5,627,100,070 cycles # 1.769 GHz + 8,928,441,764 instructions # 1.59 insn per cycle + 3.183062200 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1357) (512y: 171) (512z: 1777) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt index 64cd5aa370..a9079e9716 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt @@ -1,71 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-05-16_15:27:28 +DATE: 2024-05-16_15:21:23 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 9.566221e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.504693e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.775023e+08 ) sec^-1 +MeanMatrixElemValue = ( 7.154219e+00 +- 1.620281e-01 ) GeV^0 +TOTAL : 0.485925 sec +INFO: No Floating Point Exceptions have been reported + 2,003,287,538 cycles # 2.816 GHz + 2,880,414,118 instructions # 1.44 insn per cycle + 0.769648039 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 4.313490e+00 +Avg ME (F77/GPU) = 4.3136695463908836 +Relative difference = 4.162439020000051e-05 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.703429e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.782201e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.782201e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.315915e+00 +- 1.953829e-01 ) GeV^0 -TOTAL : 3.995435 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.938364e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.994818e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.994818e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.175644e+00 +- 1.658767e-01 ) GeV^0 +TOTAL : 5.499884 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 13,985,040,470 cycles # 3.498 GHz - 42,109,829,772 instructions # 3.01 insn per cycle - 3.998021323 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 604) (avx2: 0) (512y: 0) (512z: 0) + 15,743,516,639 cycles # 2.861 GHz + 42,225,863,593 instructions # 2.68 insn per cycle + 5.505101290 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 601) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313574e+00 @@ -73,29 +104,29 @@ Avg ME (F77/C++) = 4.3135739049175754 Relative difference = 2.2042608890083832e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 6.001795e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.414096e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.414096e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.315913e+00 +- 1.953828e-01 ) GeV^0 -TOTAL : 1.861772 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.494085e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.834702e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.834702e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.175642e+00 +- 1.658767e-01 ) GeV^0 +TOTAL : 2.423560 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,514,230,564 cycles # 3.495 GHz - 16,926,292,213 instructions # 2.60 insn per cycle - 1.864397069 seconds time elapsed + 6,948,197,620 cycles # 2.861 GHz + 16,919,710,710 instructions # 2.44 insn per cycle + 2.428887408 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2983) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313572e+00 @@ -103,29 +134,29 @@ Avg ME (F77/C++) = 4.3135722205042839 Relative difference = 5.111872113533787e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.100363e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.229661e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.229661e+06 ) sec^-1 -MeanMatrixElemValue = ( 7.289195e+00 +- 1.809101e-01 ) GeV^0 -TOTAL : 1.062731 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.820914e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.816967e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.816967e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 1.429543 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 3,552,382,042 cycles # 3.335 GHz - 7,970,082,680 instructions # 2.24 insn per cycle - 1.065333882 seconds time elapsed + 3,855,960,900 cycles # 2.689 GHz + 7,989,689,028 instructions # 2.07 insn per cycle + 1.434693752 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3289) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313565e+00 @@ -133,29 +164,29 @@ Avg ME (F77/C++) = 4.3135645699221641 Relative difference = 9.97035713074993e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.126084e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.261376e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.261376e+06 ) sec^-1 -MeanMatrixElemValue = ( 7.289195e+00 +- 1.809101e-01 ) GeV^0 -TOTAL : 1.042775 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.282128e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.407558e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.407558e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 1.355217 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 3,486,677,745 cycles # 3.336 GHz - 7,511,921,838 instructions # 2.15 insn per cycle - 1.045365922 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3091) (512y: 3) (512z: 0) + 3,662,603,190 cycles # 2.693 GHz + 7,491,885,625 instructions # 2.05 insn per cycle + 1.360533114 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3036) (512y: 23) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313565e+00 @@ -163,29 +194,29 @@ Avg ME (F77/C++) = 4.3135645699221641 Relative difference = 9.97035713074993e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.331577e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.530592e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.530592e+06 ) sec^-1 -MeanMatrixElemValue = ( 7.289196e+00 +- 1.809101e-01 ) GeV^0 -TOTAL : 0.899900 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.072932e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.653576e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.653576e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 1.816585 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 3,008,871,631 cycles # 3.335 GHz - 5,914,260,675 instructions # 1.97 insn per cycle - 0.902557879 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2421) (512y: 8) (512z: 2031) + 3,322,287,385 cycles # 1.825 GHz + 5,988,754,595 instructions # 1.80 insn per cycle + 1.821834164 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2418) (512y: 32) (512z: 2031) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313564e+00 diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt index 7b8532ad59..0359df7b77 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt @@ -1,71 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-05-16_15:27:39 +DATE: 2024-05-16_15:21:44 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 9.575897e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.505600e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.778243e+08 ) sec^-1 +MeanMatrixElemValue = ( 7.154219e+00 +- 1.620281e-01 ) GeV^0 +TOTAL : 0.488373 sec +INFO: No Floating Point Exceptions have been reported + 2,007,752,645 cycles # 2.812 GHz + 2,828,437,251 instructions # 1.41 insn per cycle + 0.772837040 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 4.313490e+00 +Avg ME (F77/GPU) = 4.3136695463908836 +Relative difference = 4.162439020000051e-05 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.800498e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.885498e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.885498e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.315915e+00 +- 1.953829e-01 ) GeV^0 -TOTAL : 3.860758 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.991117e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.050649e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.050649e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.175644e+00 +- 1.658767e-01 ) GeV^0 +TOTAL : 5.356246 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 13,507,580,954 cycles # 3.497 GHz - 42,358,644,316 instructions # 3.14 insn per cycle - 3.863318185 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 560) (avx2: 0) (512y: 0) (512z: 0) + 15,339,535,429 cycles # 2.862 GHz + 42,474,905,629 instructions # 2.77 insn per cycle + 5.361339903 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 559) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313574e+00 @@ -73,29 +104,29 @@ Avg ME (F77/C++) = 4.3135739491553977 Relative difference = 1.1787117204016727e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 7.356612e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.985601e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.985601e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.315913e+00 +- 1.953828e-01 ) GeV^0 -TOTAL : 1.540920 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.134209e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.583662e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.583662e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.175642e+00 +- 1.658767e-01 ) GeV^0 +TOTAL : 2.132369 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 5,396,516,030 cycles # 3.497 GHz - 16,268,270,688 instructions # 3.01 insn per cycle - 1.543527679 seconds time elapsed + 6,119,263,046 cycles # 2.864 GHz + 16,261,701,502 instructions # 2.66 insn per cycle + 2.137647028 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2702) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313572e+00 @@ -103,29 +134,29 @@ Avg ME (F77/C++) = 4.3135722205042839 Relative difference = 5.111872113533787e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 9.346896e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.027143e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.027143e+06 ) sec^-1 -MeanMatrixElemValue = ( 7.289195e+00 +- 1.809101e-01 ) GeV^0 -TOTAL : 1.232243 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.498649e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.173623e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.173623e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 1.703269 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 4,112,861,248 cycles # 3.331 GHz - 9,021,964,660 instructions # 2.19 insn per cycle - 1.234913176 seconds time elapsed + 4,581,699,390 cycles # 2.683 GHz + 9,041,394,873 instructions # 1.97 insn per cycle + 1.708700782 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3558) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313565e+00 @@ -133,29 +164,29 @@ Avg ME (F77/C++) = 4.3135645687580109 Relative difference = 9.997345323075056e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 9.478929e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.042114e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.042114e+06 ) sec^-1 -MeanMatrixElemValue = ( 7.289195e+00 +- 1.809101e-01 ) GeV^0 -TOTAL : 1.217923 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.705142e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.424759e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.424759e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 1.652652 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 4,064,285,992 cycles # 3.331 GHz - 8,513,710,724 instructions # 2.09 insn per cycle - 1.220504425 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3337) (512y: 0) (512z: 0) + 4,411,023,052 cycles # 2.662 GHz + 8,532,140,610 instructions # 1.93 insn per cycle + 1.658018216 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3311) (512y: 10) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313565e+00 @@ -163,29 +194,29 @@ Avg ME (F77/C++) = 4.3135645687580109 Relative difference = 9.997345323075056e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.319130e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.517145e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.517145e+06 ) sec^-1 -MeanMatrixElemValue = ( 7.289196e+00 +- 1.809101e-01 ) GeV^0 -TOTAL : 0.906747 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.118773e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.709641e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.709641e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 1.803301 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 3,034,187,177 cycles # 3.338 GHz - 5,883,924,547 instructions # 1.94 insn per cycle - 0.909399222 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2360) (512y: 8) (512z: 2014) + 3,302,699,675 cycles # 1.827 GHz + 5,958,419,273 instructions # 1.80 insn per cycle + 1.808538430 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2357) (512y: 32) (512z: 2014) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313564e+00 diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt index 06fcaedc6d..4345b3c851 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt @@ -1,71 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-05-16_15:27:49 +DATE: 2024-05-16_15:22:04 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.596790e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.087710e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.182609e+08 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 0.531791 sec +INFO: No Floating Point Exceptions have been reported + 2,158,479,665 cycles # 2.816 GHz + 3,115,947,911 instructions # 1.44 insn per cycle + 0.824595914 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 4.313472e+00 +Avg ME (F77/GPU) = 4.3134711012809239 +Relative difference = 2.0835166567625394e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.375607e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.429535e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.429535e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 4.543809 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.739846e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.783362e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.783362e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 6.139062 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 15,901,263,136 cycles # 3.498 GHz - 41,771,711,123 instructions # 2.63 insn per cycle - 4.546568218 seconds time elapsed + 17,579,172,412 cycles # 2.862 GHz + 41,767,715,738 instructions # 2.38 insn per cycle + 6.144566394 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 655) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -73,29 +104,29 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.231502e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.418747e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.418747e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 2.606220 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.944235e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.080846e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.080846e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.674671 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 9,126,872,302 cycles # 3.499 GHz - 26,345,829,381 instructions # 2.89 insn per cycle - 2.609067827 seconds time elapsed + 10,157,870,701 cycles # 2.761 GHz + 26,355,211,403 instructions # 2.59 insn per cycle + 3.680088821 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2438) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -103,29 +134,29 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 6.477118e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.909959e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.909959e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 1.747382 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.512494e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.830362e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.830362e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.435332 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 5,818,706,514 cycles # 3.325 GHz - 12,039,889,556 instructions # 2.07 insn per cycle - 1.750186275 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2708) (512y: 0) (512z: 0) + 6,512,604,303 cycles # 2.669 GHz + 12,120,159,732 instructions # 1.86 insn per cycle + 2.440902409 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2718) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -133,29 +164,29 @@ Avg ME (F77/C++) = 4.3134712319139954 Relative difference = 1.7806676491157786e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 6.659622e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.115827e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.115827e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 1.699288 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.920988e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.300442e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.300442e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.244169 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 5,659,668,657 cycles # 3.326 GHz - 11,266,150,406 instructions # 1.99 insn per cycle - 1.702085699 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2425) (512y: 130) (512z: 0) + 6,018,583,564 cycles # 2.676 GHz + 11,228,279,694 instructions # 1.87 insn per cycle + 2.249711111 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2369) (512y: 150) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -163,29 +194,29 @@ Avg ME (F77/C++) = 4.3134712319139954 Relative difference = 1.7806676491157786e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 7.230568e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.765472e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.765472e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 1.575086 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.148571e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.297302e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.297302e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.442171 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 5,245,552,991 cycles # 3.325 GHz - 8,052,851,902 instructions # 1.54 insn per cycle - 1.577873671 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1780) (512y: 112) (512z: 1753) + 6,072,730,798 cycles # 1.762 GHz + 8,215,005,190 instructions # 1.35 insn per cycle + 3.447734816 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1787) (512y: 134) (512z: 1755) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt index 480631cabb..fc67fec042 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt @@ -1,71 +1,102 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-05-16_15:28:02 +DATE: 2024-05-16_15:22:31 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.615689e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.096145e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.193163e+08 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 0.527662 sec +INFO: No Floating Point Exceptions have been reported + 2,187,091,067 cycles # 2.822 GHz + 3,143,599,790 instructions # 1.44 insn per cycle + 0.831715891 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 4.313472e+00 +Avg ME (F77/GPU) = 4.3134711012809239 +Relative difference = 2.0835166567625394e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.347178e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.400187e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.400187e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 4.596559 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.750132e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.794255e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.794255e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 6.103500 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 16,087,359,617 cycles # 3.498 GHz - 43,040,598,944 instructions # 2.68 insn per cycle - 4.599289395 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 662) (avx2: 0) (512y: 0) (512z: 0) + 17,473,867,626 cycles # 2.861 GHz + 43,052,630,037 instructions # 2.46 insn per cycle + 6.108967949 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 651) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -73,29 +104,29 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.411626e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.615612e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.615612e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 2.505649 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.176372e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.336517e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.336517e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.414423 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 8,772,065,424 cycles # 3.497 GHz - 25,160,170,266 instructions # 2.87 insn per cycle - 2.508503600 seconds time elapsed + 9,783,940,024 cycles # 2.862 GHz + 25,167,910,576 instructions # 2.57 insn per cycle + 3.420037518 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2276) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -103,29 +134,29 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 6.114431e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.500436e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.500436e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 1.844369 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.178030e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.451835e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.451835e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.622185 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,139,158,659 cycles # 3.324 GHz - 12,731,086,383 instructions # 2.07 insn per cycle - 1.847184447 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2709) (512y: 0) (512z: 0) + 7,019,924,583 cycles # 2.672 GHz + 12,790,606,448 instructions # 1.82 insn per cycle + 2.627804246 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2699) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -133,29 +164,29 @@ Avg ME (F77/C++) = 4.3134712319139954 Relative difference = 1.7806676491157786e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 6.276022e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.678921e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.678921e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 1.796889 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.488078e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.801083e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.801083e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.447720 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 5,981,917,485 cycles # 3.324 GHz - 12,063,062,925 instructions # 2.02 insn per cycle - 1.799731034 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2379) (512y: 217) (512z: 0) + 6,546,937,322 cycles # 2.670 GHz + 12,109,881,739 instructions # 1.85 insn per cycle + 2.453162643 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2351) (512y: 227) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -163,29 +194,29 @@ Avg ME (F77/C++) = 4.3134712319139954 Relative difference = 1.7806676491157786e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 7.011666e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.510292e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.510292e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 1.621074 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.983756e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.117708e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.117708e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.627336 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 5,396,377,266 cycles # 3.324 GHz - 8,870,172,709 instructions # 1.64 insn per cycle - 1.623924011 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1899) (512y: 157) (512z: 2081) + 6,377,996,877 cycles # 1.756 GHz + 8,984,744,450 instructions # 1.41 insn per cycle + 3.632964633 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1892) (512y: 178) (512z: 2083) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt index 6dfec42160..f2a95b68c4 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt @@ -1,183 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-05-16_15:26:51 +DATE: 2024-05-16_15:19:32 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.205899e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.229515e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.233614e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.467479 sec +INFO: No Floating Point Exceptions have been reported + 1,929,394,895 cycles # 2.809 GHz + 2,774,653,842 instructions # 1.44 insn per cycle + 0.745241861 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.854750e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.994181e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.003911e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 +TOTAL : 0.485365 sec +INFO: No Floating Point Exceptions have been reported + 1,990,830,698 cycles # 2.816 GHz + 2,942,277,354 instructions # 1.48 insn per cycle + 0.765598417 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 8.127459e-06 +Avg ME (F77/GPU) = 8.1274562860176604E-006 +Relative difference = 3.3392753366481633e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.523683e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.527922e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.527922e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.119146 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.339413e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.342602e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.342602e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.163415 sec INFO: No Floating Point Exceptions have been reported - 418,376,951 cycles # 3.478 GHz - 1,390,302,519 instructions # 3.32 insn per cycle - 0.120575892 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3971) (avx2: 0) (512y: 0) (512z: 0) + 474,956,853 cycles # 2.847 GHz + 1,396,923,375 instructions # 2.94 insn per cycle + 0.167372542 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3991) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167185E-006 Relative difference = 3.339276495559746e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 8.956577e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.973214e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.973214e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.061242 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.350685e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.362490e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.362490e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.088282 sec INFO: No Floating Point Exceptions have been reported - 212,720,705 cycles # 3.409 GHz - 693,825,670 instructions # 3.26 insn per cycle - 0.062675749 seconds time elapsed + 246,129,842 cycles # 2.680 GHz + 699,160,574 instructions # 2.84 insn per cycle + 0.092454839 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 9501) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167168E-006 Relative difference = 3.3392764976441195e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.011044e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.018814e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.018814e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.028471 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.421076e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.426847e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.426847e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.042262 sec INFO: No Floating Point Exceptions have been reported - 98,055,757 cycles # 3.306 GHz - 254,719,428 instructions # 2.60 insn per cycle - 0.029885593 seconds time elapsed + 120,513,094 cycles # 2.641 GHz + 260,079,134 instructions # 2.16 insn per cycle + 0.046206481 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8227) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.229563e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.239103e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.239103e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.025969 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.614262e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.622122e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.622122e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.037855 sec INFO: No Floating Point Exceptions have been reported - 89,725,190 cycles # 3.307 GHz - 235,372,197 instructions # 2.62 insn per cycle - 0.027374685 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7399) (512y: 130) (512z: 0) + 109,022,775 cycles # 2.645 GHz + 240,308,972 instructions # 2.20 insn per cycle + 0.041904895 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7348) (512y: 150) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.319557e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.341463e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.341463e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.018202 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.170349e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.175260e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.175260e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.050252 sec INFO: No Floating Point Exceptions have been reported - 62,033,907 cycles # 3.208 GHz - 132,765,845 instructions # 2.14 insn per cycle - 0.019577536 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1685) (512y: 104) (512z: 6590) + 96,595,554 cycles # 1.802 GHz + 138,452,128 instructions # 1.43 insn per cycle + 0.054148545 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1692) (512y: 126) (512z: 6592) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt index 485330b742..ca894b0a6d 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt @@ -1,183 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-05-16_15:26:53 +DATE: 2024-05-16_15:19:42 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.237277e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.263102e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.267367e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.467317 sec +INFO: No Floating Point Exceptions have been reported + 1,933,877,717 cycles # 2.813 GHz + 2,829,779,417 instructions # 1.46 insn per cycle + 0.746133258 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.945887e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.087010e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.096853e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 +TOTAL : 0.483889 sec +INFO: No Floating Point Exceptions have been reported + 2,005,783,112 cycles # 2.816 GHz + 2,927,359,248 instructions # 1.46 insn per cycle + 0.768925329 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 8.127459e-06 +Avg ME (F77/GPU) = 8.1274562860176604E-006 +Relative difference = 3.3392753366481633e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.504525e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.508775e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.508775e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.119069 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.344408e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.347652e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.347652e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.162339 sec INFO: No Floating Point Exceptions have been reported - 417,500,473 cycles # 3.472 GHz - 1,384,122,736 instructions # 3.32 insn per cycle - 0.120521598 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3879) (avx2: 0) (512y: 0) (512z: 0) + 471,806,818 cycles # 2.848 GHz + 1,391,948,601 instructions # 2.95 insn per cycle + 0.166295977 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3869) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167185E-006 Relative difference = 3.339276495559746e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 8.869441e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.885004e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.885004e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.061247 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.367799e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.379601e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.379601e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.087176 sec INFO: No Floating Point Exceptions have been reported - 211,782,117 cycles # 3.393 GHz - 688,974,542 instructions # 3.25 insn per cycle - 0.062692430 seconds time elapsed + 243,999,829 cycles # 2.694 GHz + 695,186,413 instructions # 2.85 insn per cycle + 0.091139423 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 9537) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167168E-006 Relative difference = 3.3392764976441195e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.994571e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.002133e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.002133e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.028171 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.395387e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.400899e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.400899e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.042187 sec INFO: No Floating Point Exceptions have been reported - 97,037,553 cycles # 3.306 GHz - 249,900,445 instructions # 2.58 insn per cycle - 0.029580809 seconds time elapsed + 119,801,052 cycles # 2.624 GHz + 255,741,591 instructions # 2.13 insn per cycle + 0.046174431 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8181) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.239951e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.249596e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.249596e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.025255 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.613988e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.621406e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.621406e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.037041 sec INFO: No Floating Point Exceptions have been reported - 87,483,215 cycles # 3.307 GHz - 230,588,362 instructions # 2.64 insn per cycle - 0.026694294 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7352) (512y: 130) (512z: 0) + 106,534,081 cycles # 2.639 GHz + 235,917,118 instructions # 2.21 insn per cycle + 0.041041363 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7301) (512y: 150) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.208887e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.229075e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.229075e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.018222 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.167962e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.172897e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.172897e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.049548 sec INFO: No Floating Point Exceptions have been reported - 61,027,044 cycles # 3.148 GHz - 127,983,571 instructions # 2.10 insn per cycle - 0.019612963 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1634) (512y: 104) (512z: 6595) + 94,554,513 cycles # 1.786 GHz + 133,899,064 instructions # 1.42 insn per cycle + 0.053428613 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1641) (512y: 126) (512z: 6597) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt index 552a5148b4..f86e27869e 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt @@ -1,183 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-05-16_15:26:55 +DATE: 2024-05-16_15:19:53 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.541598e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.553658e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.556693e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.188142e-04 +- 6.565203e-04 ) GeV^-4 +TOTAL : 0.467629 sec +INFO: No Floating Point Exceptions have been reported + 1,964,166,954 cycles # 2.815 GHz + 2,823,406,286 instructions # 1.44 insn per cycle + 0.754117473 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 9.614317e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.731134e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.742615e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.020493e-03 +- 4.025604e-03 ) GeV^-4 +TOTAL : 0.468434 sec +INFO: No Floating Point Exceptions have been reported + 1,946,164,211 cycles # 2.817 GHz + 2,847,399,547 instructions # 1.46 insn per cycle + 0.748191861 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 8.127250e-06 +Avg ME (F77/GPU) = 8.1272870954487585E-006 +Relative difference = 4.564329725014175e-06 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.594807e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.599355e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.599355e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.274746e-01 +- 1.272813e-01 ) GeV^-4 -TOTAL : 0.117292 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.448019e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.451516e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.451516e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.177153e-04 +- 6.554185e-04 ) GeV^-4 +TOTAL : 0.158474 sec INFO: No Floating Point Exceptions have been reported - 411,053,000 cycles # 3.469 GHz - 1,388,257,678 instructions # 3.38 insn per cycle - 0.118765314 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3058) (avx2: 0) (512y: 0) (512z: 0) + 461,638,972 cycles # 2.852 GHz + 1,393,493,000 instructions # 3.02 insn per cycle + 0.162490485 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3070) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127811e-06 Avg ME (F77/C++) = 8.1278105211728276E-006 Relative difference = 5.891219330978222e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.711156e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.717425e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.717425e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.274747e-01 +- 1.272814e-01 ) GeV^-4 -TOTAL : 0.033054 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.201120e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.205395e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.205395e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.177152e-04 +- 6.554185e-04 ) GeV^-4 +TOTAL : 0.048902 sec INFO: No Floating Point Exceptions have been reported - 115,868,477 cycles # 3.385 GHz - 370,507,638 instructions # 3.20 insn per cycle - 0.034519034 seconds time elapsed + 138,099,810 cycles # 2.644 GHz + 375,723,801 instructions # 2.72 insn per cycle + 0.052805368 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:10134) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127809e-06 Avg ME (F77/C++) = 8.1278090510674588E-006 Relative difference = 6.2830535070193674e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.972394e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.005793e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.005793e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.275184e-01 +- 1.273251e-01 ) GeV^-4 -TOTAL : 0.015435 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.699468e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.721720e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.721720e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 +TOTAL : 0.024500 sec INFO: No Floating Point Exceptions have been reported - 54,887,348 cycles # 3.297 GHz - 141,404,697 instructions # 2.58 insn per cycle - 0.016831180 seconds time elapsed + 72,431,086 cycles # 2.595 GHz + 146,734,646 instructions # 2.03 insn per cycle + 0.028413255 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8933) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127537e-06 Avg ME (F77/C++) = 8.1275366216540664E-006 Relative difference = 4.655111786058001e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.344012e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.385737e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.385737e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.275184e-01 +- 1.273251e-01 ) GeV^-4 -TOTAL : 0.014294 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.950281e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.979563e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.979563e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 +TOTAL : 0.023199 sec INFO: No Floating Point Exceptions have been reported - 51,391,104 cycles # 3.292 GHz - 131,448,490 instructions # 2.56 insn per cycle - 0.015740752 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8219) (512y: 8) (512z: 0) + 67,511,576 cycles # 2.517 GHz + 136,466,222 instructions # 2.02 insn per cycle + 0.027372188 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8164) (512y: 28) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127537e-06 Avg ME (F77/C++) = 8.1275366216540664E-006 Relative difference = 4.655111786058001e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 6.373023e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.462095e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.462095e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.275185e-01 +- 1.273251e-01 ) GeV^-4 -TOTAL : 0.010501 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.260359e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.280493e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.280493e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165747e-04 +- 6.542824e-04 ) GeV^-4 +TOTAL : 0.028479 sec INFO: No Floating Point Exceptions have been reported - 37,353,295 cycles # 3.150 GHz - 79,834,868 instructions # 2.14 insn per cycle - 0.012002265 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2575) (512y: 8) (512z: 6935) + 59,124,236 cycles # 1.860 GHz + 85,286,285 instructions # 1.44 insn per cycle + 0.032355670 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2572) (512y: 32) (512z: 6935) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127537e-06 Avg ME (F77/C++) = 8.1275369863475849E-006 Relative difference = 1.6797726498700304e-09 diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt index 07a4a566fa..2af7dd76f9 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt @@ -1,183 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-05-16_15:26:57 +DATE: 2024-05-16_15:20:03 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.561126e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.572400e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.575387e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.188142e-04 +- 6.565203e-04 ) GeV^-4 +TOTAL : 0.469592 sec +INFO: No Floating Point Exceptions have been reported + 1,933,901,131 cycles # 2.816 GHz + 2,803,636,036 instructions # 1.45 insn per cycle + 0.744726293 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 9.901730e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.003706e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.005157e+06 ) sec^-1 +MeanMatrixElemValue = ( 8.020495e-03 +- 4.025606e-03 ) GeV^-4 +TOTAL : 0.471774 sec +INFO: No Floating Point Exceptions have been reported + 1,934,886,385 cycles # 2.815 GHz + 2,830,776,229 instructions # 1.46 insn per cycle + 0.746474254 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 8.127250e-06 +Avg ME (F77/GPU) = 8.1272870252982758E-006 +Relative difference = 4.555698209723637e-06 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.609563e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.614104e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.614104e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.274746e-01 +- 1.272813e-01 ) GeV^-4 -TOTAL : 0.116376 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.452227e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.455705e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.455705e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.177153e-04 +- 6.554185e-04 ) GeV^-4 +TOTAL : 0.157329 sec INFO: No Floating Point Exceptions have been reported - 409,017,056 cycles # 3.478 GHz - 1,383,259,571 instructions # 3.38 insn per cycle - 0.117820263 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2946) (avx2: 0) (512y: 0) (512z: 0) + 458,573,657 cycles # 2.854 GHz + 1,388,574,447 instructions # 3.03 insn per cycle + 0.161242660 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2959) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127811e-06 Avg ME (F77/C++) = 8.1278105211728276E-006 Relative difference = 5.891219330978222e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.706065e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.712338e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.712338e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.274747e-01 +- 1.272814e-01 ) GeV^-4 -TOTAL : 0.032671 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.204538e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.208976e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.208976e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.177152e-04 +- 6.554185e-04 ) GeV^-4 +TOTAL : 0.047932 sec INFO: No Floating Point Exceptions have been reported - 114,247,893 cycles # 3.374 GHz - 365,724,809 instructions # 3.20 insn per cycle - 0.034093739 seconds time elapsed + 136,097,535 cycles # 2.652 GHz + 371,027,952 instructions # 2.73 insn per cycle + 0.051946079 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:10117) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127809e-06 Avg ME (F77/C++) = 8.1278090510674588E-006 Relative difference = 6.2830535070193674e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.912469e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.944750e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.944750e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.275184e-01 +- 1.273251e-01 ) GeV^-4 -TOTAL : 0.015078 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.559391e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.580217e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.580217e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 +TOTAL : 0.024960 sec INFO: No Floating Point Exceptions have been reported - 53,850,273 cycles # 3.309 GHz - 136,539,794 instructions # 2.54 insn per cycle - 0.016472996 seconds time elapsed + 71,167,021 cycles # 2.517 GHz + 142,031,155 instructions # 2.00 insn per cycle + 0.028974311 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8887) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127537e-06 Avg ME (F77/C++) = 8.1275366216540664E-006 Relative difference = 4.655111786058001e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.348789e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.388103e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.388103e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.275184e-01 +- 1.273251e-01 ) GeV^-4 -TOTAL : 0.013707 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.102195e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.131341e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.131341e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 +TOTAL : 0.021142 sec INFO: No Floating Point Exceptions have been reported - 49,526,307 cycles # 3.303 GHz - 126,638,085 instructions # 2.56 insn per cycle - 0.015125513 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8172) (512y: 8) (512z: 0) + 63,906,261 cycles # 2.611 GHz + 131,729,034 instructions # 2.06 insn per cycle + 0.025029577 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8117) (512y: 28) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127537e-06 Avg ME (F77/C++) = 8.1275366216540664E-006 Relative difference = 4.655111786058001e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 6.362894e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.452373e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.452373e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.275185e-01 +- 1.273251e-01 ) GeV^-4 -TOTAL : 0.010004 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.321655e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.342179e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.342179e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165747e-04 +- 6.542824e-04 ) GeV^-4 +TOTAL : 0.027241 sec INFO: No Floating Point Exceptions have been reported - 35,465,401 cycles # 3.143 GHz - 74,977,185 instructions # 2.11 insn per cycle - 0.011421321 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2524) (512y: 8) (512z: 6939) + 57,621,926 cycles # 1.879 GHz + 80,488,160 instructions # 1.40 insn per cycle + 0.031258526 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2521) (512y: 32) (512z: 6939) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127537e-06 Avg ME (F77/C++) = 8.1275369863475849E-006 Relative difference = 1.6797726498700304e-09 diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt index 0c1df1c6d0..16ac12981a 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt @@ -1,183 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-05-16_15:26:59 +DATE: 2024-05-16_15:20:13 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.172533e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.195464e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.199217e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.467059 sec +INFO: No Floating Point Exceptions have been reported + 1,929,783,722 cycles # 2.812 GHz + 2,830,067,082 instructions # 1.47 insn per cycle + 0.744348567 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.817494e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.954472e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.963776e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 +TOTAL : 0.485210 sec +INFO: No Floating Point Exceptions have been reported + 1,989,265,248 cycles # 2.816 GHz + 2,972,405,087 instructions # 1.49 insn per cycle + 0.764721680 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 8.127459e-06 +Avg ME (F77/GPU) = 8.1274562879405200E-006 +Relative difference = 3.3369094561706885e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.480417e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.484604e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.484604e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.120324 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.312127e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.315249e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.315249e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.164695 sec INFO: No Floating Point Exceptions have been reported - 422,693,979 cycles # 3.479 GHz - 1,398,982,689 instructions # 3.31 insn per cycle - 0.121787632 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3983) (avx2: 0) (512y: 0) (512z: 0) + 479,517,658 cycles # 2.854 GHz + 1,405,303,424 instructions # 2.93 insn per cycle + 0.168655160 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3977) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562948736117E-006 Relative difference = 3.32837900190667e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 8.952459e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.974369e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.974369e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.061230 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.589174e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.601629e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.601629e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.085009 sec INFO: No Floating Point Exceptions have been reported - 211,904,125 cycles # 3.396 GHz - 685,639,297 instructions # 3.24 insn per cycle - 0.062689212 seconds time elapsed + 242,672,694 cycles # 2.748 GHz + 691,102,866 instructions # 2.85 insn per cycle + 0.088915527 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 9324) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563175290919E-006 Relative difference = 3.3005037703909805e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.020104e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.027829e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.027829e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.028377 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.402863e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.409241e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.409241e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.042757 sec INFO: No Floating Point Exceptions have been reported - 97,728,139 cycles # 3.306 GHz - 252,391,837 instructions # 2.58 insn per cycle - 0.029788449 seconds time elapsed + 119,836,607 cycles # 2.596 GHz + 257,882,084 instructions # 2.15 insn per cycle + 0.046733316 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8244) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.253977e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.263726e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.263726e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.025667 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.611690e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.620124e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.620124e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.037856 sec INFO: No Floating Point Exceptions have been reported - 88,944,423 cycles # 3.307 GHz - 233,215,123 instructions # 2.62 insn per cycle - 0.027132704 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7393) (512y: 126) (512z: 0) + 108,462,768 cycles # 2.631 GHz + 238,127,423 instructions # 2.20 insn per cycle + 0.041890123 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7342) (512y: 146) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.241277e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.262092e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.262092e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.018575 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.150674e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.155466e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.155466e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.051477 sec INFO: No Floating Point Exceptions have been reported - 62,726,022 cycles # 3.178 GHz - 133,561,563 instructions # 2.13 insn per cycle - 0.019979396 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1946) (512y: 100) (512z: 6321) + 99,538,839 cycles # 1.810 GHz + 139,339,349 instructions # 1.40 insn per cycle + 0.055665824 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1953) (512y: 122) (512z: 6323) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt index 3c13a3eafa..96180e8a09 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt @@ -1,183 +1,229 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-05-16_15:27:01 +DATE: 2024-05-16_15:20:23 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.207087e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.230616e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.234507e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.468179 sec +INFO: No Floating Point Exceptions have been reported + 1,938,727,271 cycles # 2.813 GHz + 2,835,562,501 instructions # 1.46 insn per cycle + 0.747262841 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.924846e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.065621e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.075056e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 +TOTAL : 0.482793 sec +INFO: No Floating Point Exceptions have been reported + 2,011,507,022 cycles # 2.818 GHz + 2,962,288,052 instructions # 1.47 insn per cycle + 0.770325801 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 8.127459e-06 +Avg ME (F77/GPU) = 8.1274562879405200E-006 +Relative difference = 3.3369094561706885e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.498320e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.502510e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.502510e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.119291 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.325014e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.328184e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.328184e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.163699 sec INFO: No Floating Point Exceptions have been reported - 419,059,455 cycles # 3.480 GHz - 1,392,561,195 instructions # 3.32 insn per cycle - 0.120716568 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3865) (avx2: 0) (512y: 0) (512z: 0) + 475,740,171 cycles # 2.851 GHz + 1,400,755,519 instructions # 2.94 insn per cycle + 0.167716370 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3871) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562948736117E-006 Relative difference = 3.32837900190667e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 8.996705e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.012856e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.012856e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.060413 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.586616e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.599028e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.599028e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.084604 sec INFO: No Floating Point Exceptions have been reported - 209,387,891 cycles # 3.399 GHz - 681,182,721 instructions # 3.25 insn per cycle - 0.061901614 seconds time elapsed + 242,310,895 cycles # 2.753 GHz + 687,440,781 instructions # 2.84 insn per cycle + 0.088664129 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 9365) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563175290919E-006 Relative difference = 3.3005037703909805e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.032467e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.040331e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.040331e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.027731 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.421509e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.427219e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.427219e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.041396 sec INFO: No Floating Point Exceptions have been reported - 95,714,730 cycles # 3.307 GHz - 247,594,769 instructions # 2.59 insn per cycle - 0.029200183 seconds time elapsed + 117,633,598 cycles # 2.630 GHz + 253,582,281 instructions # 2.16 insn per cycle + 0.045344869 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8196) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.254878e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.264628e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.264628e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.025136 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.533249e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.540083e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.540083e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.038782 sec INFO: No Floating Point Exceptions have been reported - 86,937,138 cycles # 3.307 GHz - 228,402,154 instructions # 2.63 insn per cycle - 0.026539714 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7343) (512y: 126) (512z: 0) + 106,121,372 cycles # 2.518 GHz + 233,883,831 instructions # 2.20 insn per cycle + 0.042791740 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7292) (512y: 146) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.275125e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.296190e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.296190e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.017773 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.148151e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.152898e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.152898e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.050273 sec INFO: No Floating Point Exceptions have been reported - 60,673,695 cycles # 3.196 GHz - 128,728,504 instructions # 2.12 insn per cycle - 0.019180779 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1897) (512y: 100) (512z: 6321) + 95,562,086 cycles # 1.781 GHz + 134,760,547 instructions # 1.41 insn per cycle + 0.054201969 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1904) (512y: 122) (512z: 6323) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt index 26ebcd33d3..15f8e8659d 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-05-16_15:26:28 +DATE: 2024-05-16_15:18:21 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.830621e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.798641e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.407520e+08 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.518896 sec +INFO: No Floating Point Exceptions have been reported + 2,130,015,467 cycles # 2.824 GHz + 3,049,782,764 instructions # 1.43 insn per cycle + 0.811167083 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 132 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 4.232897e-01 +Avg ME (F77/GPU) = 0.42328961386341946 +Relative difference = 2.034932117056294e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.354291e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.552617e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.552617e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 0.907102 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.652167e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.115593e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.115593e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 1.200987 sec INFO: No Floating Point Exceptions have been reported - 3,180,797,976 cycles # 3.496 GHz - 8,725,132,831 instructions # 2.74 insn per cycle - 0.910013968 seconds time elapsed + 3,451,141,340 cycles # 2.863 GHz + 8,714,346,508 instructions # 2.53 insn per cycle + 1.206502072 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 458) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341951 Relative difference = 2.0349321157448718e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.370898e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.106660e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.106660e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 0.572261 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.615216e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.136998e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.136998e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.764589 sec INFO: No Floating Point Exceptions have been reported - 2,009,046,062 cycles # 3.495 GHz - 5,459,978,422 instructions # 2.72 insn per cycle - 0.575162423 seconds time elapsed + 2,197,801,743 cycles # 2.856 GHz + 5,465,338,789 instructions # 2.49 insn per cycle + 0.770190206 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1298) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341951 Relative difference = 2.0349321157448718e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.422177e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.188609e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.188609e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 0.437673 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.276018e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.408168e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.408168e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.576218 sec INFO: No Floating Point Exceptions have been reported - 1,498,454,827 cycles # 3.403 GHz - 3,159,780,709 instructions # 2.11 insn per cycle - 0.440583091 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1457) (512y: 0) (512z: 0) + 1,593,709,911 cycles # 2.743 GHz + 3,182,241,147 instructions # 2.00 insn per cycle + 0.581747530 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1459) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341946 Relative difference = 2.034932117056294e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.570084e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.469999e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.469999e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 0.422625 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.349428e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.560869e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.560869e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.561533 sec INFO: No Floating Point Exceptions have been reported - 1,446,725,428 cycles # 3.402 GHz - 3,027,985,230 instructions # 2.09 insn per cycle - 0.425578887 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1272) (512y: 95) (512z: 0) + 1,552,006,209 cycles # 2.741 GHz + 3,083,871,547 instructions # 1.99 insn per cycle + 0.567100846 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1274) (512y: 95) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341946 Relative difference = 2.034932117056294e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.357154e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.401947e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.401947e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 0.368169 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.103380e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.012957e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.012957e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.614313 sec INFO: No Floating Point Exceptions have been reported - 1,265,935,948 cycles # 3.414 GHz - 2,320,403,035 instructions # 1.83 insn per cycle - 0.371132171 seconds time elapsed + 1,344,567,311 cycles # 2.171 GHz + 2,376,857,450 instructions # 1.77 insn per cycle + 0.619905839 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 584) (512y: 62) (512z: 953) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341946 Relative difference = 2.034932117056294e-07 diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt index 1975a5970f..6add239f16 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-05-16_15:26:32 +DATE: 2024-05-16_15:18:33 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.948407e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.328423e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.761410e+08 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.519601 sec +INFO: No Floating Point Exceptions have been reported + 2,123,926,879 cycles # 2.815 GHz + 2,991,717,095 instructions # 1.41 insn per cycle + 0.811782941 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 4.232897e-01 +Avg ME (F77/GPU) = 0.42328961386341946 +Relative difference = 2.034932117056294e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.361427e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.560462e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.560462e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 0.901713 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.686449e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.122021e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.122021e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 1.196252 sec INFO: No Floating Point Exceptions have been reported - 3,160,289,773 cycles # 3.495 GHz - 8,639,884,903 instructions # 2.73 insn per cycle - 0.904592180 seconds time elapsed + 3,435,810,217 cycles # 2.862 GHz + 8,629,255,980 instructions # 2.51 insn per cycle + 1.201785163 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 403) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341951 Relative difference = 2.0349321157448718e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.368303e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.105355e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.105355e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 0.572637 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.590372e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.090308e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.090308e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.773787 sec INFO: No Floating Point Exceptions have been reported - 2,010,981,764 cycles # 3.496 GHz - 5,393,612,503 instructions # 2.68 insn per cycle - 0.575509948 seconds time elapsed + 2,172,281,754 cycles # 2.790 GHz + 5,399,686,889 instructions # 2.49 insn per cycle + 0.779398624 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1258) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341951 Relative difference = 2.0349321157448718e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.443558e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.171366e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.171366e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 0.435258 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.283822e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.420214e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.420214e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.573489 sec INFO: No Floating Point Exceptions have been reported - 1,490,573,485 cycles # 3.404 GHz - 3,126,999,219 instructions # 2.10 insn per cycle - 0.438133228 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1384) (512y: 0) (512z: 0) + 1,585,769,603 cycles # 2.741 GHz + 3,149,146,191 instructions # 1.99 insn per cycle + 0.579182812 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1386) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341946 Relative difference = 2.034932117056294e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.583125e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.473819e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.473819e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 0.418929 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.354137e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.604902e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.604902e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.559958 sec INFO: No Floating Point Exceptions have been reported - 1,435,129,940 cycles # 3.405 GHz - 3,006,753,806 instructions # 2.10 insn per cycle - 0.421776338 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1218) (512y: 95) (512z: 0) + 1,547,131,577 cycles # 2.739 GHz + 3,062,437,995 instructions # 1.98 insn per cycle + 0.565482274 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1220) (512y: 95) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341946 Relative difference = 2.034932117056294e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.334038e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.389007e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.389007e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 0.368990 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.108481e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.023241e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.023241e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.612188 sec INFO: No Floating Point Exceptions have been reported - 1,268,599,838 cycles # 3.414 GHz - 2,306,107,917 instructions # 1.82 insn per cycle - 0.371913609 seconds time elapsed + 1,354,565,413 cycles # 2.195 GHz + 2,362,076,089 instructions # 1.74 insn per cycle + 0.617754113 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 557) (512y: 62) (512z: 944) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341946 Relative difference = 2.034932117056294e-07 diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt index a4e0799fe6..35b822f8f6 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-05-16_15:26:36 +DATE: 2024-05-16_15:18:45 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.370205e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.202282e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.219119e+09 ) sec^-1 +MeanMatrixElemValue = ( 4.240325e-01 +- 1.231174e-04 ) GeV^0 +TOTAL : 0.481970 sec +INFO: No Floating Point Exceptions have been reported + 1,992,725,828 cycles # 2.818 GHz + 2,868,294,521 instructions # 1.44 insn per cycle + 0.764321619 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 72 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 4.232893e-01 +Avg ME (F77/GPU) = 0.42328959883889183 +Relative difference = 7.059920764700599e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.418692e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.668051e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.668051e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241992e-01 +- 1.228718e-04 ) GeV^0 -TOTAL : 0.857057 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.685625e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.126627e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.126627e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 1.173857 sec INFO: No Floating Point Exceptions have been reported - 3,000,819,032 cycles # 3.492 GHz - 8,687,236,928 instructions # 2.89 insn per cycle - 0.859729395 seconds time elapsed + 3,371,653,633 cycles # 2.862 GHz + 8,663,374,999 instructions # 2.57 insn per cycle + 1.179087797 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 464) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 Avg ME (F77/C++) = 0.42328961598104797 Relative difference = 3.775440734888737e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.450523e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.473548e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.473548e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241992e-01 +- 1.228718e-04 ) GeV^0 -TOTAL : 0.420012 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.242831e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.476100e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.476100e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.559869 sec INFO: No Floating Point Exceptions have been reported - 1,475,989,433 cycles # 3.494 GHz - 3,695,071,583 instructions # 2.50 insn per cycle - 0.422713076 seconds time elapsed + 1,544,628,517 cycles # 2.742 GHz + 3,687,558,281 instructions # 2.39 insn per cycle + 0.565253973 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1472) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 Avg ME (F77/C++) = 0.42328960439772345 Relative difference = 1.0389396439618597e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.518335e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.025697e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.025697e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241992e-01 +- 1.228719e-04 ) GeV^0 -TOTAL : 0.343695 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.072720e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.536969e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.536969e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.431765 sec INFO: No Floating Point Exceptions have been reported - 1,181,236,861 cycles # 3.413 GHz - 2,407,227,485 instructions # 2.04 insn per cycle - 0.346416388 seconds time elapsed + 1,203,780,059 cycles # 2.758 GHz + 2,425,738,448 instructions # 2.02 insn per cycle + 0.436956710 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1835) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 Avg ME (F77/C++) = 0.42328956670826301 Relative difference = 7.865002347873079e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.663295e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.394993e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.394993e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241992e-01 +- 1.228719e-04 ) GeV^0 -TOTAL : 0.338394 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.171115e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.846212e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.846212e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.420853 sec INFO: No Floating Point Exceptions have been reported - 1,163,803,486 cycles # 3.415 GHz - 2,320,293,736 instructions # 1.99 insn per cycle - 0.341067500 seconds time elapsed + 1,176,016,394 cycles # 2.764 GHz + 2,371,904,468 instructions # 2.02 insn per cycle + 0.426173333 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1716) (512y: 2) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 Avg ME (F77/C++) = 0.42328956670826301 Relative difference = 7.865002347873079e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 5.367890e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.143642e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.143642e+07 ) sec^-1 -MeanMatrixElemValue = ( 4.241992e-01 +- 1.228719e-04 ) GeV^0 -TOTAL : 0.309164 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.877260e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.908000e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.908000e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.456855 sec INFO: No Floating Point Exceptions have been reported - 1,065,579,084 cycles # 3.421 GHz - 1,993,744,116 instructions # 1.87 insn per cycle - 0.311782023 seconds time elapsed + 1,057,659,631 cycles # 2.291 GHz + 2,045,594,279 instructions # 1.93 insn per cycle + 0.462305299 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1125) (512y: 5) (512z: 1216) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 Avg ME (F77/C++) = 0.42328957567224279 Relative difference = 5.7473080363015266e-08 diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt index fa58368d8f..7aff49b16c 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-05-16_15:26:40 +DATE: 2024-05-16_15:18:56 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.371360e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.210950e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.256375e+09 ) sec^-1 +MeanMatrixElemValue = ( 4.240325e-01 +- 1.231174e-04 ) GeV^0 +TOTAL : 0.480672 sec +INFO: No Floating Point Exceptions have been reported + 1,992,055,315 cycles # 2.814 GHz + 2,833,598,547 instructions # 1.42 insn per cycle + 0.764848194 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 71 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 4.232893e-01 +Avg ME (F77/GPU) = 0.42328960436861962 +Relative difference = 7.190557844040413e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.460439e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.724757e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.724757e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241992e-01 +- 1.228718e-04 ) GeV^0 -TOTAL : 0.835443 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.763702e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.137508e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.137508e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 1.163446 sec INFO: No Floating Point Exceptions have been reported - 2,930,080,668 cycles # 3.497 GHz - 8,561,435,617 instructions # 2.92 insn per cycle - 0.838100501 seconds time elapsed + 3,338,476,373 cycles # 2.858 GHz + 8,537,550,948 instructions # 2.56 insn per cycle + 1.168736395 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 372) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 Avg ME (F77/C++) = 0.42328961598104797 Relative difference = 3.775440734888737e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.429713e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.414628e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.414628e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241992e-01 +- 1.228718e-04 ) GeV^0 -TOTAL : 0.421363 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.260122e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.497908e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.497908e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.555036 sec INFO: No Floating Point Exceptions have been reported - 1,477,358,785 cycles # 3.487 GHz - 3,663,157,543 instructions # 2.48 insn per cycle - 0.423998620 seconds time elapsed + 1,536,047,057 cycles # 2.745 GHz + 3,655,155,421 instructions # 2.38 insn per cycle + 0.560267212 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1417) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 Avg ME (F77/C++) = 0.42328960439772345 Relative difference = 1.0389396439618597e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.539131e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.024156e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.024156e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241992e-01 +- 1.228719e-04 ) GeV^0 -TOTAL : 0.342149 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.063874e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.501699e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.501699e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.432903 sec INFO: No Floating Point Exceptions have been reported - 1,175,594,540 cycles # 3.412 GHz - 2,391,743,655 instructions # 2.03 insn per cycle - 0.344799784 seconds time elapsed + 1,210,141,290 cycles # 2.765 GHz + 2,409,755,736 instructions # 1.99 insn per cycle + 0.438252635 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1739) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 Avg ME (F77/C++) = 0.42328956670826301 Relative difference = 7.865002347873079e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.668725e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.423495e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.423495e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241992e-01 +- 1.228719e-04 ) GeV^0 -TOTAL : 0.337063 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.166764e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.861571e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.861571e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.420903 sec INFO: No Floating Point Exceptions have been reported - 1,159,169,528 cycles # 3.415 GHz - 2,308,303,558 instructions # 1.99 insn per cycle - 0.339703561 seconds time elapsed + 1,178,969,939 cycles # 2.770 GHz + 2,360,225,770 instructions # 2.00 insn per cycle + 0.426183474 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1639) (512y: 2) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 Avg ME (F77/C++) = 0.42328956670826301 Relative difference = 7.865002347873079e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 5.455050e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.167889e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.167889e+07 ) sec^-1 -MeanMatrixElemValue = ( 4.241992e-01 +- 1.228719e-04 ) GeV^0 -TOTAL : 0.304678 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.911284e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.009343e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.009343e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.450059 sec INFO: No Floating Point Exceptions have been reported - 1,051,727,913 cycles # 3.426 GHz - 1,978,782,960 instructions # 1.88 insn per cycle - 0.307278068 seconds time elapsed + 1,050,992,336 cycles # 2.312 GHz + 2,030,439,704 instructions # 1.93 insn per cycle + 0.455402836 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1038) (512y: 5) (512z: 1206) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 Avg ME (F77/C++) = 0.42328957567224279 Relative difference = 5.7473080363015266e-08 diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt index d3bac7a6e7..abe970d6c3 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-05-16_15:26:43 +DATE: 2024-05-16_15:19:08 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.820532e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.774843e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.362520e+08 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.522135 sec +INFO: No Floating Point Exceptions have been reported + 2,125,526,304 cycles # 2.816 GHz + 3,031,609,259 instructions # 1.43 insn per cycle + 0.813775431 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 132 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 4.232897e-01 +Avg ME (F77/GPU) = 0.42328961420809225 +Relative difference = 2.02678940084305e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.327098e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.519271e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.519271e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 0.921747 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.477506e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.093135e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.093135e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 1.221347 sec INFO: No Floating Point Exceptions have been reported - 3,232,183,518 cycles # 3.497 GHz - 8,792,573,925 instructions # 2.72 insn per cycle - 0.924658528 seconds time elapsed + 3,505,104,547 cycles # 2.859 GHz + 8,781,502,817 instructions # 2.51 insn per cycle + 1.226777715 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 466) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962565639783 Relative difference = 1.7563291089600324e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.394321e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.150474e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.150474e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 0.569327 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.650256e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.201424e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.201424e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.750812 sec INFO: No Floating Point Exceptions have been reported - 1,997,302,428 cycles # 3.492 GHz - 5,456,554,007 instructions # 2.73 insn per cycle - 0.572266310 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1316) (avx2: 0) (512y: 0) (512z: 0) + 2,158,593,065 cycles # 2.858 GHz + 5,461,970,761 instructions # 2.53 insn per cycle + 0.756427517 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1315) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962565639783 Relative difference = 1.7563291089600324e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.559612e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.435531e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.435531e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 0.426046 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.173052e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.222124e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.222124e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.600946 sec INFO: No Floating Point Exceptions have been reported - 1,459,913,674 cycles # 3.405 GHz - 3,092,427,903 instructions # 2.12 insn per cycle - 0.429012673 seconds time elapsed + 1,584,857,703 cycles # 2.630 GHz + 3,130,453,718 instructions # 1.98 insn per cycle + 0.606559761 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1508) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962604218012 Relative difference = 1.747215201983364e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.740143e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.828447e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.828447e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 0.407253 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.444228e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.788523e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.788523e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.544040 sec INFO: No Floating Point Exceptions have been reported - 1,396,236,023 cycles # 3.407 GHz - 2,910,886,914 instructions # 2.08 insn per cycle - 0.410124731 seconds time elapsed + 1,507,653,377 cycles # 2.746 GHz + 2,979,978,086 instructions # 1.98 insn per cycle + 0.549733637 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1266) (512y: 104) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962604218012 Relative difference = 1.747215201983364e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.557464e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.979745e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.979745e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 0.356952 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.159766e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.131056e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.131056e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.601738 sec INFO: No Floating Point Exceptions have been reported - 1,228,668,021 cycles # 3.417 GHz - 2,251,383,273 instructions # 1.83 insn per cycle - 0.359835554 seconds time elapsed + 1,324,343,740 cycles # 2.183 GHz + 2,317,585,809 instructions # 1.75 insn per cycle + 0.607328338 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 708) (512y: 64) (512z: 1000) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962604218012 Relative difference = 1.747215201983364e-07 diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt index f622a5fe7b..91c7a883f0 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-05-16_15:26:47 +DATE: 2024-05-16_15:19:20 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.922874e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.310136e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.745093e+08 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.518290 sec +INFO: No Floating Point Exceptions have been reported + 2,124,893,311 cycles # 2.820 GHz + 3,045,592,907 instructions # 1.43 insn per cycle + 0.810370808 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 4.232897e-01 +Avg ME (F77/GPU) = 0.42328961420809225 +Relative difference = 2.02678940084305e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.324875e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.511952e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.511952e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 0.922159 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.542081e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.100861e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.100861e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 1.212162 sec INFO: No Floating Point Exceptions have been reported - 3,233,137,052 cycles # 3.496 GHz - 8,703,016,285 instructions # 2.69 insn per cycle - 0.925038017 seconds time elapsed + 3,479,876,909 cycles # 2.860 GHz + 8,693,142,752 instructions # 2.50 insn per cycle + 1.217788949 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 408) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962565639783 Relative difference = 1.7563291089600324e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.384239e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.131034e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.131034e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 0.569347 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.583309e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.076893e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.076893e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.776846 sec INFO: No Floating Point Exceptions have been reported - 1,994,158,544 cycles # 3.487 GHz - 5,378,748,355 instructions # 2.70 insn per cycle - 0.572208576 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1287) (avx2: 0) (512y: 0) (512z: 0) + 2,167,338,088 cycles # 2.773 GHz + 5,396,551,029 instructions # 2.49 insn per cycle + 0.782321373 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1286) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962565639783 Relative difference = 1.7563291089600324e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.417383e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.522816e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.522816e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 0.437843 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.326845e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.550286e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.550286e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.565802 sec INFO: No Floating Point Exceptions have been reported - 1,500,871,039 cycles # 3.408 GHz - 3,055,075,884 instructions # 2.04 insn per cycle - 0.440705424 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1408) (512y: 0) (512z: 0) + 1,565,712,129 cycles # 2.743 GHz + 3,096,211,416 instructions # 1.98 insn per cycle + 0.571442008 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1403) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962604218012 Relative difference = 1.747215201983364e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.730924e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.816090e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.816090e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 0.407927 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.453432e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.812851e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.812851e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.541762 sec INFO: No Floating Point Exceptions have been reported - 1,397,568,169 cycles # 3.408 GHz - 2,885,067,782 instructions # 2.06 insn per cycle - 0.410801458 seconds time elapsed + 1,501,240,710 cycles # 2.746 GHz + 2,962,583,104 instructions # 1.97 insn per cycle + 0.547343450 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1207) (512y: 104) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962604218012 Relative difference = 1.747215201983364e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.578847e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.063083e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.063083e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 0.355641 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.179755e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.168512e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.168512e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.595795 sec INFO: No Floating Point Exceptions have been reported - 1,223,923,877 cycles # 3.417 GHz - 2,232,489,048 instructions # 1.82 insn per cycle - 0.358486033 seconds time elapsed + 1,328,066,698 cycles # 2.210 GHz + 2,301,968,914 instructions # 1.73 insn per cycle + 0.601517736 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 669) (512y: 64) (512z: 987) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962604218012 Relative difference = 1.747215201983364e-07 diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt index 6ee29efed3..685cbca5b9 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:25:21 +DATE: 2024-05-16_15:16:05 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.742150e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.168430e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.277843e+08 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 0.532609 sec +INFO: No Floating Point Exceptions have been reported + 2,187,320,510 cycles # 2.847 GHz + 3,138,661,758 instructions # 1.43 insn per cycle + 0.825533767 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 3.234080e+00 +Avg ME (F77/GPU) = 3.2340795799595186 +Relative difference = 1.2987943449389332e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.787390e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.861874e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.861874e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 3.890119 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.052254e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.112326e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.112326e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 5.217611 sec INFO: No Floating Point Exceptions have been reported - 13,618,207,328 cycles # 3.499 GHz - 38,387,959,458 instructions # 2.82 insn per cycle - 3.893052041 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 679) (avx2: 0) (512y: 0) (512z: 0) + 15,171,088,318 cycles # 2.905 GHz + 38,379,828,637 instructions # 2.53 insn per cycle + 5.223033411 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 673) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340795799593964 Relative difference = 1.2987947225564713e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.831062e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.077740e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.077740e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 2.297638 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.483453e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.675957e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.675957e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 3.119586 sec INFO: No Floating Point Exceptions have been reported - 8,042,217,440 cycles # 3.496 GHz - 24,577,604,144 instructions # 3.06 insn per cycle - 2.300592580 seconds time elapsed + 9,050,575,942 cycles # 2.897 GHz + 24,585,418,505 instructions # 2.72 insn per cycle + 3.125051862 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340795799593955 Relative difference = 1.2987947253027805e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 7.841839e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.487845e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.487845e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 1.464950 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.531605e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.007383e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.007383e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 2.004395 sec INFO: No Floating Point Exceptions have been reported - 4,886,201,453 cycles # 3.329 GHz - 11,233,439,311 instructions # 2.30 insn per cycle - 1.467827376 seconds time elapsed + 5,470,487,475 cycles # 2.723 GHz + 11,258,117,341 instructions # 2.06 insn per cycle + 2.009874159 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2379) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340795799594546 Relative difference = 1.2987945426732077e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 8.176872e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.875940e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.875940e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 1.406146 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.034312e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.611178e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.611178e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 1.846817 sec INFO: No Floating Point Exceptions have been reported - 4,692,310,341 cycles # 3.331 GHz - 10,637,543,631 instructions # 2.27 insn per cycle - 1.409098877 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2128) (512y: 124) (512z: 0) + 4,937,000,755 cycles # 2.666 GHz + 10,562,656,233 instructions # 2.14 insn per cycle + 1.852346867 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340795799594546 Relative difference = 1.2987945426732077e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 8.318068e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.029145e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.029145e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 1.384314 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.686069e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.892849e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.892849e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 2.955560 sec INFO: No Floating Point Exceptions have been reported - 4,618,770,611 cycles # 3.330 GHz - 7,677,034,251 instructions # 1.66 insn per cycle - 1.387244129 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1438) (512y: 100) (512z: 1543) + 5,363,967,162 cycles # 1.812 GHz + 7,798,816,647 instructions # 1.45 insn per cycle + 2.961128813 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1545) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340795799594546 Relative difference = 1.2987945426732077e-07 diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt index 10b04f1f61..e33bd01ef0 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:25:32 +DATE: 2024-05-16_15:16:29 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.734270e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.167895e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.277771e+08 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 0.531030 sec +INFO: No Floating Point Exceptions have been reported + 2,147,766,041 cycles # 2.808 GHz + 3,081,960,346 instructions # 1.43 insn per cycle + 0.823573588 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 3.234080e+00 +Avg ME (F77/GPU) = 3.2340795799595186 +Relative difference = 1.2987943449389332e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.740388e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.812570e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.812570e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 3.954536 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.072347e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.133952e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.133952e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 5.167480 sec INFO: No Floating Point Exceptions have been reported - 13,826,590,739 cycles # 3.497 GHz - 40,193,490,698 instructions # 2.91 insn per cycle - 3.957648216 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 686) (avx2: 0) (512y: 0) (512z: 0) + 15,011,121,904 cycles # 2.902 GHz + 40,101,107,795 instructions # 2.67 insn per cycle + 5.172969591 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340795799593964 Relative difference = 1.2987947225564713e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 5.106154e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.381557e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.381557e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 2.180640 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.643871e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.853935e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.853935e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 2.986462 sec INFO: No Floating Point Exceptions have been reported - 7,631,771,620 cycles # 3.496 GHz - 23,662,957,585 instructions # 3.10 insn per cycle - 2.183596089 seconds time elapsed + 8,687,902,361 cycles # 2.905 GHz + 23,671,582,038 instructions # 2.72 insn per cycle + 2.991891761 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2071) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340795799593955 Relative difference = 1.2987947253027805e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 6.885731e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.381574e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.381574e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 1.649994 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.688647e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.031946e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.031946e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 2.347118 sec INFO: No Floating Point Exceptions have been reported - 5,495,835,199 cycles # 3.326 GHz - 13,036,765,053 instructions # 2.37 insn per cycle - 1.652905979 seconds time elapsed + 6,408,205,490 cycles # 2.726 GHz + 13,061,009,362 instructions # 2.04 insn per cycle + 2.352705794 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2545) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340795799594546 Relative difference = 1.2987945426732077e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 7.276846e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.824926e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.824926e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 1.564074 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.217515e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.639971e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.639971e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 2.116902 sec INFO: No Floating Point Exceptions have been reported - 5,212,061,608 cycles # 3.327 GHz - 12,262,487,055 instructions # 2.35 insn per cycle - 1.566961165 seconds time elapsed + 5,786,103,959 cycles # 2.728 GHz + 12,322,398,791 instructions # 2.13 insn per cycle + 2.122365893 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2092) (512y: 294) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340795799594546 Relative difference = 1.2987945426732077e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 7.945464e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.602857e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.602857e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 1.442889 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.391355e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.565589e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.565589e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 3.201193 sec INFO: No Floating Point Exceptions have been reported - 4,806,634,394 cycles # 3.325 GHz - 9,536,077,650 instructions # 1.98 insn per cycle - 1.445783339 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1520) (512y: 199) (512z: 1969) + 5,819,258,849 cycles # 1.816 GHz + 9,603,315,511 instructions # 1.65 insn per cycle + 3.206783116 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1509) (512y: 209) (512z: 1970) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340795799594546 Relative difference = 1.2987945426732077e-07 diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt index afa4d341c3..fa2404eda0 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:25:45 +DATE: 2024-05-16_15:16:53 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 9.806467e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.679043e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.988694e+08 ) sec^-1 +MeanMatrixElemValue = ( 3.294909e+00 +- 3.228140e-03 ) GeV^0 +TOTAL : 0.484472 sec +INFO: No Floating Point Exceptions have been reported + 2,024,107,607 cycles # 2.847 GHz + 2,925,717,340 instructions # 1.45 insn per cycle + 0.767822860 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 3.234085e+00 +Avg ME (F77/GPU) = 3.2341253389604390 +Relative difference = 1.2473067479392238e-05 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.067256e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.169032e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.169032e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287818e+00 +- 3.220110e-03 ) GeV^0 -TOTAL : 3.535020 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.190102e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.263149e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.263149e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294973e+00 +- 3.228584e-03 ) GeV^0 +TOTAL : 4.875075 sec INFO: No Floating Point Exceptions have been reported - 12,369,254,741 cycles # 3.496 GHz - 38,267,142,446 instructions # 3.09 insn per cycle - 3.537767898 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 598) (avx2: 0) (512y: 0) (512z: 0) + 14,157,231,167 cycles # 2.902 GHz + 38,349,372,496 instructions # 2.71 insn per cycle + 4.880360280 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 596) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234094e+00 Avg ME (F77/C++) = 3.2340941932052374 Relative difference = 5.974014286114415e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 6.570040e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.068066e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.068066e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287818e+00 +- 3.220110e-03 ) GeV^0 -TOTAL : 1.711228 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.893708e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.295163e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.295163e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294972e+00 +- 3.228583e-03 ) GeV^0 +TOTAL : 2.231375 sec INFO: No Floating Point Exceptions have been reported - 5,990,330,565 cycles # 3.497 GHz - 15,827,583,020 instructions # 2.64 insn per cycle - 1.713959969 seconds time elapsed + 6,474,839,888 cycles # 2.896 GHz + 15,821,273,128 instructions # 2.44 insn per cycle + 2.236825857 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2693) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234093e+00 Avg ME (F77/C++) = 3.2340934062376618 Relative difference = 1.2561100182708985e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.250375e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.419899e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.419899e+06 ) sec^-1 -MeanMatrixElemValue = ( 3.287795e+00 +- 3.219542e-03 ) GeV^0 -TOTAL : 0.948179 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.952001e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.027533e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.027533e+06 ) sec^-1 +MeanMatrixElemValue = ( 3.295004e+00 +- 3.229072e-03 ) GeV^0 +TOTAL : 1.258720 sec INFO: No Floating Point Exceptions have been reported - 3,175,156,549 cycles # 3.340 GHz - 7,579,990,052 instructions # 2.39 insn per cycle - 0.950902565 seconds time elapsed + 3,454,982,692 cycles # 2.735 GHz + 7,599,041,128 instructions # 2.20 insn per cycle + 1.263980564 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3054) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234092e+00 Avg ME (F77/C++) = 3.2340919882990420 Relative difference = 3.6180040581126224e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.284548e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.465053e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.465053e+06 ) sec^-1 -MeanMatrixElemValue = ( 3.287795e+00 +- 3.219542e-03 ) GeV^0 -TOTAL : 0.927820 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.592851e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.112843e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.112843e+06 ) sec^-1 +MeanMatrixElemValue = ( 3.295004e+00 +- 3.229072e-03 ) GeV^0 +TOTAL : 1.180051 sec INFO: No Floating Point Exceptions have been reported - 3,106,651,076 cycles # 3.339 GHz - 7,229,113,930 instructions # 2.33 insn per cycle - 0.930548590 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2909) (512y: 3) (512z: 0) + 3,244,154,820 cycles # 2.739 GHz + 7,208,080,032 instructions # 2.22 insn per cycle + 1.185371954 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2854) (512y: 23) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234092e+00 Avg ME (F77/C++) = 3.2340919882990420 Relative difference = 3.6180040581126224e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.443944e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.682164e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.682164e+06 ) sec^-1 -MeanMatrixElemValue = ( 3.287795e+00 +- 3.219542e-03 ) GeV^0 -TOTAL : 0.837741 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.861599e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.601056e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.601056e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.295004e+00 +- 3.229072e-03 ) GeV^0 +TOTAL : 1.616099 sec INFO: No Floating Point Exceptions have been reported - 2,808,864,011 cycles # 3.343 GHz - 5,765,952,499 instructions # 2.05 insn per cycle - 0.840456519 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2378) (512y: 0) (512z: 1889) + 3,061,871,050 cycles # 1.890 GHz + 5,840,738,200 instructions # 1.91 insn per cycle + 1.621459577 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2375) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234092e+00 Avg ME (F77/C++) = 3.2340921289287508 Relative difference = 3.986551736519174e-08 diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt index 04e4af6a7b..17580b0829 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:25:54 +DATE: 2024-05-16_15:17:12 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 9.907160e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.728602e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.048441e+08 ) sec^-1 +MeanMatrixElemValue = ( 3.294909e+00 +- 3.228140e-03 ) GeV^0 +TOTAL : 0.485743 sec +INFO: No Floating Point Exceptions have been reported + 2,023,423,533 cycles # 2.849 GHz + 2,905,255,031 instructions # 1.44 insn per cycle + 0.768600730 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 3.234085e+00 +Avg ME (F77/GPU) = 3.2341253389604390 +Relative difference = 1.2473067479392238e-05 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 3.115923e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.224041e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.224041e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287818e+00 +- 3.220110e-03 ) GeV^0 -TOTAL : 3.480773 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.168782e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.238544e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.238544e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294973e+00 +- 3.228584e-03 ) GeV^0 +TOTAL : 4.921731 sec INFO: No Floating Point Exceptions have been reported - 12,185,361,621 cycles # 3.498 GHz - 39,804,148,929 instructions # 3.27 insn per cycle - 3.483488402 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 577) (avx2: 0) (512y: 0) (512z: 0) + 14,314,886,956 cycles # 2.906 GHz + 39,834,092,366 instructions # 2.78 insn per cycle + 4.927032591 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 567) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234094e+00 Avg ME (F77/C++) = 3.2340941675938666 Relative difference = 5.182096339328524e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 8.103568e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.876705e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.876705e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287818e+00 +- 3.220110e-03 ) GeV^0 -TOTAL : 1.407812 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.713515e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.269520e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.269520e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294972e+00 +- 3.228583e-03 ) GeV^0 +TOTAL : 1.922771 sec INFO: No Floating Point Exceptions have been reported - 4,928,928,439 cycles # 3.495 GHz - 15,291,551,221 instructions # 3.10 insn per cycle - 1.410572590 seconds time elapsed + 5,581,497,918 cycles # 2.896 GHz + 15,286,085,618 instructions # 2.74 insn per cycle + 1.928038449 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2473) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234093e+00 Avg ME (F77/C++) = 3.2340934062376618 Relative difference = 1.2561100182708985e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 9.083110e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.950197e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.950197e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287795e+00 +- 3.219542e-03 ) GeV^0 -TOTAL : 1.264015 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.348339e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.987488e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.987488e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.295004e+00 +- 3.229072e-03 ) GeV^0 +TOTAL : 1.738529 sec INFO: No Floating Point Exceptions have been reported - 4,217,167,690 cycles # 3.330 GHz - 9,714,976,139 instructions # 2.30 insn per cycle - 1.266795977 seconds time elapsed + 4,748,584,350 cycles # 2.724 GHz + 9,734,762,909 instructions # 2.05 insn per cycle + 1.743720825 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3707) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234092e+00 Avg ME (F77/C++) = 3.2340919817797840 Relative difference = 5.633796441974414e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 9.192270e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.008345e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.008345e+06 ) sec^-1 -MeanMatrixElemValue = ( 3.287795e+00 +- 3.219542e-03 ) GeV^0 -TOTAL : 1.252318 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.524514e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.201131e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.201131e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.295004e+00 +- 3.229072e-03 ) GeV^0 +TOTAL : 1.693263 sec INFO: No Floating Point Exceptions have been reported - 4,177,402,910 cycles # 3.329 GHz - 9,272,184,711 instructions # 2.22 insn per cycle - 1.255030337 seconds time elapsed + 4,630,030,488 cycles # 2.727 GHz + 9,326,323,775 instructions # 2.01 insn per cycle + 1.698452247 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3495) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234092e+00 Avg ME (F77/C++) = 3.2340919817797840 Relative difference = 5.633796441974414e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 1.176003e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.330692e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.330692e+06 ) sec^-1 -MeanMatrixElemValue = ( 3.287795e+00 +- 3.219542e-03 ) GeV^0 -TOTAL : 1.003208 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.566237e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.043529e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.043529e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.295004e+00 +- 3.229072e-03 ) GeV^0 +TOTAL : 1.970968 sec INFO: No Floating Point Exceptions have been reported - 3,352,170,777 cycles # 3.333 GHz - 6,969,311,498 instructions # 2.08 insn per cycle - 1.005942374 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2609) (512y: 0) (512z: 2220) + 3,659,262,236 cycles # 1.853 GHz + 7,035,706,161 instructions # 1.92 insn per cycle + 1.976219857 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2608) (512y: 12) (512z: 2220) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234092e+00 Avg ME (F77/C++) = 3.2340921270661056 Relative difference = 3.928957668408837e-08 diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt index 690851a1c4..b504154b8b 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:26:04 +DATE: 2024-05-16_15:17:33 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.734753e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.166290e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.275672e+08 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 0.527580 sec +INFO: No Floating Point Exceptions have been reported + 2,184,025,819 cycles # 2.852 GHz + 3,120,664,968 instructions # 1.43 insn per cycle + 0.822365132 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 3.234080e+00 +Avg ME (F77/GPU) = 3.2340795839181666 +Relative difference = 1.2865539301192385e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.763034e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.836655e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.836655e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 3.924694 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.032702e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.091464e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.091464e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 5.267767 sec INFO: No Floating Point Exceptions have been reported - 13,738,931,281 cycles # 3.498 GHz - 38,522,259,648 instructions # 2.80 insn per cycle - 3.927637983 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 683) (avx2: 0) (512y: 0) (512z: 0) + 15,275,610,730 cycles # 2.898 GHz + 38,585,204,587 instructions # 2.53 insn per cycle + 5.273127531 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 677) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796721168488 Relative difference = 1.0138374786539113e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 4.902677e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.156176e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.156176e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 2.266078 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.478780e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.672331e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.672331e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 3.124457 sec INFO: No Floating Point Exceptions have been reported - 7,934,170,917 cycles # 3.497 GHz - 24,223,400,789 instructions # 3.05 insn per cycle - 2.268949403 seconds time elapsed + 8,951,368,692 cycles # 2.862 GHz + 24,230,346,765 instructions # 2.71 insn per cycle + 3.129932357 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2188) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796721168488 Relative difference = 1.0138374786539113e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 8.000583e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.673091e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.673091e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 1.438286 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.646169e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.144963e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.144963e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 1.966588 sec INFO: No Floating Point Exceptions have been reported - 4,798,481,088 cycles # 3.330 GHz - 11,208,114,331 instructions # 2.34 insn per cycle - 1.441176727 seconds time elapsed + 5,394,193,630 cycles # 2.737 GHz + 11,282,079,100 instructions # 2.09 insn per cycle + 1.972075346 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2483) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796772295590 Relative difference = 9.980286234148268e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 8.267759e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.985920e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.985920e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 1.392423 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.312770e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.933844e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.933844e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 1.769300 sec INFO: No Floating Point Exceptions have been reported - 4,644,990,529 cycles # 3.330 GHz - 10,555,084,125 instructions # 2.27 insn per cycle - 1.395314648 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2221) (512y: 128) (512z: 0) + 4,855,634,573 cycles # 2.737 GHz + 10,529,908,188 instructions # 2.17 insn per cycle + 1.774939787 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2170) (512y: 148) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796772295590 Relative difference = 9.980286234148268e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 8.584630e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.339766e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.339766e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 1.345070 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.779051e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.993953e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.993953e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 2.883837 sec INFO: No Floating Point Exceptions have been reported - 4,488,723,511 cycles # 3.331 GHz - 7,446,352,831 instructions # 1.66 insn per cycle - 1.347938337 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1626) (512y: 104) (512z: 1609) + 5,232,692,174 cycles # 1.812 GHz + 7,609,089,901 instructions # 1.45 insn per cycle + 2.889504238 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1633) (512y: 126) (512z: 1611) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796772295590 Relative difference = 9.980286234148268e-08 diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt index 5bafeaaf14..62b069d661 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt @@ -1,183 +1,214 @@ -Building in /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:26:16 +DATE: 2024-05-16_15:17:56 -On itgold91.cern.ch [CPU: Intel(R) Xeon(R) Gold 6326 CPU] [GPU: none]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.743856e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.168884e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.279553e+08 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 0.531580 sec +INFO: No Floating Point Exceptions have been reported + 2,155,818,187 cycles # 2.818 GHz + 3,085,690,683 instructions # 1.43 insn per cycle + 0.823819066 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 3.234080e+00 +Avg ME (F77/GPU) = 3.2340795839181666 +Relative difference = 1.2865539301192385e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 2.685093e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.754855e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.754855e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 4.033579 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.002464e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.060011e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.060011e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 5.344548 sec INFO: No Floating Point Exceptions have been reported - 14,113,224,310 cycles # 3.497 GHz - 40,344,618,972 instructions # 2.86 insn per cycle - 4.036535060 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 682) (avx2: 0) (512y: 0) (512z: 0) + 15,331,700,326 cycles # 2.866 GHz + 40,369,778,421 instructions # 2.63 insn per cycle + 5.350011304 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796721168488 Relative difference = 1.0138374786539113e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 5.092059e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.369527e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.369527e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 2.185707 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.555017e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.755921e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.755921e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 3.059082 sec INFO: No Floating Point Exceptions have been reported - 7,645,095,536 cycles # 3.493 GHz - 23,246,336,631 instructions # 3.04 insn per cycle - 2.189160403 seconds time elapsed + 8,522,277,742 cycles # 2.782 GHz + 23,253,428,254 instructions # 2.73 insn per cycle + 3.064709896 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2090) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796721168488 Relative difference = 1.0138374786539113e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 6.835334e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.320136e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.320136e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 1.661105 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.699594e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.044812e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.044812e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 2.340476 sec INFO: No Floating Point Exceptions have been reported - 5,533,662,877 cycles # 3.326 GHz - 12,888,058,955 instructions # 2.33 insn per cycle - 1.664031290 seconds time elapsed + 6,239,696,903 cycles # 2.661 GHz + 12,963,096,678 instructions # 2.08 insn per cycle + 2.346005075 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2668) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796772295590 Relative difference = 9.980286234148268e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 7.211823e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.749624e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.749624e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 1.577159 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.032659e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.430530e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.430530e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 2.191599 sec INFO: No Floating Point Exceptions have been reported - 5,254,421,927 cycles # 3.327 GHz - 12,086,386,777 instructions # 2.30 insn per cycle - 1.580021491 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2209) (512y: 296) (512z: 0) + 5,901,015,524 cycles # 2.687 GHz + 12,238,387,260 instructions # 2.07 insn per cycle + 2.197121947 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2208) (512y: 296) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796772295590 Relative difference = 9.980286234148268e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.4.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 32 -EvtsPerSec[Rmb+ME] (23) = ( 8.190386e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.891944e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.891944e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 1.403785 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.554826e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.745267e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.745267e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 3.058656 sec INFO: No Floating Point Exceptions have been reported - 4,676,949,985 cycles # 3.325 GHz - 8,673,917,546 instructions # 1.85 insn per cycle - 1.406719317 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1501) (512y: 173) (512z: 1907) + 5,596,491,041 cycles # 1.827 GHz + 8,743,545,379 instructions # 1.56 insn per cycle + 3.064278596 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1490) (512y: 183) (512z: 1908) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2024/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796772295590 Relative difference = 9.980286234148268e-08 From 687c1e59ccf4071a7e5cdd3d7548adfd27c8ae33 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 16 May 2024 20:35:51 +0300 Subject: [PATCH 43/48] [jtmk2] bug fix for -short|-ggttggg and -nobsm|-bsmonly options in tmad/allTees.sh --- epochX/cudacpp/tmad/allTees.sh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/epochX/cudacpp/tmad/allTees.sh b/epochX/cudacpp/tmad/allTees.sh index 6e0d1931a6..a9479018f9 100755 --- a/epochX/cudacpp/tmad/allTees.sh +++ b/epochX/cudacpp/tmad/allTees.sh @@ -17,10 +17,10 @@ add10x="+10x" while [ "$1" != "" ]; do if [ "$1" == "-short" ]; then - short=1 # all but ggttggg + short=1 # all (possibly including bsm) but ggttggg shift elif [ "$1" == "-ggttggg" ]; then - short=-1 # only ggttggg + short=-1 # only ggttggg (implies no bsm!) shift elif [ "$1" == "-makeclean" ]; then makeclean=$1 @@ -55,7 +55,9 @@ status=$? ended1="(SM tests)\nENDED(1) AT $(date) [Status=$status]" if [ "${bsm}" != "-nobsm" ]; then - ${scrdir}/teeMadX.sh -heftggbb -susyggtt -susyggt1t1 -smeftggtttt $flts $makeclean $rmrdat $add10x + if [ "$short" != "-1" ]; then + ${scrdir}/teeMadX.sh -heftggbb -susyggtt -susyggt1t1 -smeftggtttt $flts $makeclean $rmrdat $add10x + fi fi status=$? ended2="(BSM tests)\nENDED(1) AT $(date) [Status=$status]" From 6c817819edb1788b97afd4b72ee505d5635716c9 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 17 May 2024 08:40:50 +0200 Subject: [PATCH 44/48] [jtmk2] rerun 96 tput tests on LUMI worker node - all as expected STARTED AT Thu 16 May 2024 03:38:14 PM EEST ./tput/teeThroughputX.sh -mix -hrd -makej -eemumu -ggtt -ggttg -ggttgg -gqttq -ggttggg -makeclean ENDED(1) AT Thu 16 May 2024 04:08:54 PM EEST [Status=2] ./tput/teeThroughputX.sh -flt -hrd -makej -eemumu -ggtt -ggttgg -inlonly -makeclean ENDED(2) AT Thu 16 May 2024 04:19:28 PM EEST [Status=0] ./tput/teeThroughputX.sh -makej -eemumu -ggtt -ggttg -gqttq -ggttgg -ggttggg -flt -bridge -makeclean ENDED(3) AT Thu 16 May 2024 04:31:08 PM EEST [Status=2] ./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -rmbhst ENDED(4) AT Thu 16 May 2024 04:33:20 PM EEST [Status=0] SKIP './tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -common ' ENDED(5) AT Thu 16 May 2024 04:33:20 PM EEST [Status=0] ./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -common ENDED(6) AT Thu 16 May 2024 04:35:30 PM EEST [Status=0] ./tput/teeThroughputX.sh -mix -hrd -makej -susyggtt -susyggt1t1 -smeftggtttt -heftggbb -makeclean ENDED(7) AT Thu 16 May 2024 04:49:20 PM EEST [Status=0] --- .../log_eemumu_mad_d_inl0_hrd0.txt | 245 ++++++--------- .../log_eemumu_mad_d_inl0_hrd0_bridge.txt | 253 ++++++--------- .../log_eemumu_mad_d_inl0_hrd0_common.txt | 231 ++++++-------- .../log_eemumu_mad_d_inl0_hrd0_rmbhst.txt | 248 ++++++--------- .../log_eemumu_mad_d_inl0_hrd1.txt | 241 ++++++-------- .../log_eemumu_mad_d_inl1_hrd0.txt | 245 ++++++--------- .../log_eemumu_mad_d_inl1_hrd1.txt | 245 ++++++--------- .../log_eemumu_mad_f_inl0_hrd0.txt | 255 +++++++-------- .../log_eemumu_mad_f_inl0_hrd0_bridge.txt | 261 +++++++-------- .../log_eemumu_mad_f_inl0_hrd0_common.txt | 241 ++++++-------- .../log_eemumu_mad_f_inl0_hrd0_rmbhst.txt | 256 +++++++-------- .../log_eemumu_mad_f_inl0_hrd1.txt | 255 +++++++-------- .../log_eemumu_mad_f_inl1_hrd0.txt | 255 +++++++-------- .../log_eemumu_mad_f_inl1_hrd1.txt | 255 +++++++-------- .../log_eemumu_mad_m_inl0_hrd0.txt | 241 ++++++-------- .../log_eemumu_mad_m_inl0_hrd1.txt | 241 ++++++-------- .../log_ggtt_mad_d_inl0_hrd0.txt | 245 ++++++--------- .../log_ggtt_mad_d_inl0_hrd0_bridge.txt | 253 ++++++--------- .../log_ggtt_mad_d_inl0_hrd0_common.txt | 231 ++++++-------- .../log_ggtt_mad_d_inl0_hrd0_rmbhst.txt | 248 ++++++--------- .../log_ggtt_mad_d_inl0_hrd1.txt | 241 ++++++-------- .../log_ggtt_mad_d_inl1_hrd0.txt | 245 ++++++--------- .../log_ggtt_mad_d_inl1_hrd1.txt | 241 ++++++-------- .../log_ggtt_mad_f_inl0_hrd0.txt | 259 +++++++-------- .../log_ggtt_mad_f_inl0_hrd0_bridge.txt | 267 +++++++--------- .../log_ggtt_mad_f_inl0_hrd0_common.txt | 251 ++++++--------- .../log_ggtt_mad_f_inl0_hrd0_rmbhst.txt | 262 +++++++--------- .../log_ggtt_mad_f_inl0_hrd1.txt | 259 +++++++-------- .../log_ggtt_mad_f_inl1_hrd0.txt | 259 +++++++-------- .../log_ggtt_mad_f_inl1_hrd1.txt | 259 +++++++-------- .../log_ggtt_mad_m_inl0_hrd0.txt | 245 ++++++--------- .../log_ggtt_mad_m_inl0_hrd1.txt | 245 ++++++--------- .../log_ggttg_mad_d_inl0_hrd0.txt | 270 +++++++--------- .../log_ggttg_mad_d_inl0_hrd0_bridge.txt | 280 +++++++---------- .../log_ggttg_mad_d_inl0_hrd1.txt | 270 +++++++--------- .../log_ggttg_mad_f_inl0_hrd0.txt | 284 ++++++++--------- .../log_ggttg_mad_f_inl0_hrd0_bridge.txt | 294 ++++++++--------- .../log_ggttg_mad_f_inl0_hrd1.txt | 284 ++++++++--------- .../log_ggttg_mad_m_inl0_hrd0.txt | 266 +++++++--------- .../log_ggttg_mad_m_inl0_hrd1.txt | 266 +++++++--------- .../log_ggttgg_mad_d_inl0_hrd0.txt | 270 +++++++--------- .../log_ggttgg_mad_d_inl0_hrd0_bridge.txt | 280 +++++++---------- .../log_ggttgg_mad_d_inl0_hrd0_common.txt | 254 +++++++-------- .../log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt | 275 +++++++--------- .../log_ggttgg_mad_d_inl0_hrd1.txt | 270 +++++++--------- .../log_ggttgg_mad_d_inl1_hrd0.txt | 278 +++++++--------- .../log_ggttgg_mad_d_inl1_hrd1.txt | 274 +++++++--------- .../log_ggttgg_mad_f_inl0_hrd0.txt | 286 ++++++++--------- .../log_ggttgg_mad_f_inl0_hrd0_bridge.txt | 296 ++++++++---------- .../log_ggttgg_mad_f_inl0_hrd0_common.txt | 280 +++++++---------- .../log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt | 291 ++++++++--------- .../log_ggttgg_mad_f_inl0_hrd1.txt | 284 ++++++++--------- .../log_ggttgg_mad_f_inl1_hrd0.txt | 284 ++++++++--------- .../log_ggttgg_mad_f_inl1_hrd1.txt | 284 ++++++++--------- .../log_ggttgg_mad_m_inl0_hrd0.txt | 266 +++++++--------- .../log_ggttgg_mad_m_inl0_hrd1.txt | 266 +++++++--------- .../log_ggttggg_mad_d_inl0_hrd0.txt | 266 +++++++--------- .../log_ggttggg_mad_d_inl0_hrd0_bridge.txt | 276 +++++++--------- .../log_ggttggg_mad_d_inl0_hrd1.txt | 266 +++++++--------- .../log_ggttggg_mad_f_inl0_hrd0.txt | 282 +++++++---------- .../log_ggttggg_mad_f_inl0_hrd0_bridge.txt | 292 ++++++++--------- .../log_ggttggg_mad_f_inl0_hrd1.txt | 282 +++++++---------- .../log_ggttggg_mad_m_inl0_hrd0.txt | 266 +++++++--------- .../log_ggttggg_mad_m_inl0_hrd1.txt | 266 +++++++--------- .../log_gqttq_mad_d_inl0_hrd0.txt | 241 +++----------- .../log_gqttq_mad_d_inl0_hrd0_bridge.txt | 254 +++------------ .../log_gqttq_mad_d_inl0_hrd1.txt | 241 +++----------- .../log_gqttq_mad_f_inl0_hrd0.txt | 241 +++----------- .../log_gqttq_mad_f_inl0_hrd0_bridge.txt | 254 +++------------ .../log_gqttq_mad_f_inl0_hrd1.txt | 241 +++----------- .../log_gqttq_mad_m_inl0_hrd0.txt | 241 +++----------- .../log_gqttq_mad_m_inl0_hrd1.txt | 241 +++----------- .../log_heftggbb_mad_d_inl0_hrd0.txt | 241 ++++++-------- .../log_heftggbb_mad_d_inl0_hrd1.txt | 241 ++++++-------- .../log_heftggbb_mad_f_inl0_hrd0.txt | 261 +++++++-------- .../log_heftggbb_mad_f_inl0_hrd1.txt | 261 +++++++-------- .../log_heftggbb_mad_m_inl0_hrd0.txt | 241 ++++++-------- .../log_heftggbb_mad_m_inl0_hrd1.txt | 241 ++++++-------- .../log_smeftggtttt_mad_d_inl0_hrd0.txt | 270 +++++++--------- .../log_smeftggtttt_mad_d_inl0_hrd1.txt | 270 +++++++--------- .../log_smeftggtttt_mad_f_inl0_hrd0.txt | 286 ++++++++--------- .../log_smeftggtttt_mad_f_inl0_hrd1.txt | 286 ++++++++--------- .../log_smeftggtttt_mad_m_inl0_hrd0.txt | 266 +++++++--------- .../log_smeftggtttt_mad_m_inl0_hrd1.txt | 266 +++++++--------- .../log_susyggt1t1_mad_d_inl0_hrd0.txt | 241 ++++++-------- .../log_susyggt1t1_mad_d_inl0_hrd1.txt | 241 ++++++-------- .../log_susyggt1t1_mad_f_inl0_hrd0.txt | 251 ++++++--------- .../log_susyggt1t1_mad_f_inl0_hrd1.txt | 251 ++++++--------- .../log_susyggt1t1_mad_m_inl0_hrd0.txt | 245 ++++++--------- .../log_susyggt1t1_mad_m_inl0_hrd1.txt | 245 ++++++--------- .../log_susyggtt_mad_d_inl0_hrd0.txt | 249 ++++++--------- .../log_susyggtt_mad_d_inl0_hrd1.txt | 253 +++++++-------- .../log_susyggtt_mad_f_inl0_hrd0.txt | 259 +++++++-------- .../log_susyggtt_mad_f_inl0_hrd1.txt | 259 +++++++-------- .../log_susyggtt_mad_m_inl0_hrd0.txt | 245 ++++++--------- .../log_susyggtt_mad_m_inl0_hrd1.txt | 245 ++++++--------- 96 files changed, 9910 insertions(+), 14984 deletions(-) diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index 8f9275e4c9..62f5a1747b 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_14:32:11 +DATE: 2024-05-16_15:54:35 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.832141e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.963737e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.194654e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.663591 sec +EvtsPerSec[Rmb+ME] (23) = ( 5.192740e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.108907e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.336897e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 +TOTAL : 0.552555 sec INFO: No Floating Point Exceptions have been reported - 2,552,888,166 cycles # 2.852 GHz - 3,988,214,096 instructions # 1.56 insn per cycle - 0.955666761 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 1,305,552,637 cycles:u # 2.392 GHz (74.72%) + 2,182,824 stalled-cycles-frontend:u # 0.17% frontend cycles idle (75.11%) + 5,346,063 stalled-cycles-backend:u # 0.41% backend cycles idle (76.06%) + 2,102,532,654 instructions:u # 1.61 insn per cycle + # 0.00 stalled cycles per insn (75.80%) + 0.627941411 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 +Avg ME (F77/GPU) = 1.2828039868165208E-002 +Relative difference = 1.0277079981222336e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.053499e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.235255e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.235255e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.389071 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.209249e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.380707e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.380707e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 5.951508 sec INFO: No Floating Point Exceptions have been reported - 18,286,976,651 cycles # 2.860 GHz - 43,966,894,713 instructions # 2.40 insn per cycle - 6.394304827 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) + 19,608,150,481 cycles:u # 3.291 GHz (74.93%) + 51,216,372 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.98%) + 54,836,990 stalled-cycles-backend:u # 0.28% backend cycles idle (75.03%) + 46,950,513,606 instructions:u # 2.39 insn per cycle + # 0.00 stalled cycles per insn (75.03%) + 5.966260573 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 472) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.575090e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.062276e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.062276e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.417510 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.889128e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.377852e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.377852e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 4.063095 sec INFO: No Floating Point Exceptions have been reported - 12,744,894,916 cycles # 2.883 GHz - 31,001,019,523 instructions # 2.43 insn per cycle - 4.422588286 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1644) (avx2: 0) (512y: 0) (512z: 0) + 13,176,650,260 cycles:u # 3.235 GHz (74.81%) + 52,164,290 stalled-cycles-frontend:u # 0.40% frontend cycles idle (74.97%) + 678,636,146 stalled-cycles-backend:u # 5.15% backend cycles idle (75.06%) + 31,192,438,300 instructions:u # 2.37 insn per cycle + # 0.02 stalled cycles per insn (75.06%) + 4.077189517 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1626) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.946862e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.717949e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.717949e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.650052 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.592796e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.443978e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.443978e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.173904 sec INFO: No Floating Point Exceptions have been reported - 10,049,299,034 cycles # 2.750 GHz - 19,366,983,583 instructions # 1.93 insn per cycle - 3.655131055 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1966) (512y: 0) (512z: 0) + 10,145,365,093 cycles:u # 3.186 GHz (74.89%) + 51,976,186 stalled-cycles-frontend:u # 0.51% frontend cycles idle (74.89%) + 1,275,348,320 stalled-cycles-backend:u # 12.57% backend cycles idle (74.97%) + 19,402,420,759 instructions:u # 1.91 insn per cycle + # 0.07 stalled cycles per insn (75.10%) + 3.187813666 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1964) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 +Avg ME (F77/C++) = 1.2828039868165090E-002 +Relative difference = 1.0277089176796747e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.022084e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.853820e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.853820e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.526182 sec -INFO: No Floating Point Exceptions have been reported - 9,727,313,452 cycles # 2.755 GHz - 18,976,774,064 instructions # 1.95 insn per cycle - 3.531366474 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1690) (512y: 181) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.695166e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.233065e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.233065e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.130967 sec -INFO: No Floating Point Exceptions have been reported - 8,580,931,991 cycles # 2.075 GHz - 15,727,945,386 instructions # 1.83 insn per cycle - 4.136130895 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 901) (512y: 154) (512z: 1258) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt index 556a164c58..a1f621c196 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt @@ -1,231 +1,182 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_14:59:51 +DATE: 2024-05-16_16:26:26 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --bridge OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 12 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.482485e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.592798e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.592798e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.321030 sec +EvtsPerSec[Rmb+ME] (23) = ( 6.452763e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.436216e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.436216e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 5.562380 sec INFO: No Floating Point Exceptions have been reported - 7,329,722,152 cycles # 2.840 GHz - 13,178,162,400 instructions # 1.80 insn per cycle - 2.637544426 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 18,356,696,438 cycles:u # 3.283 GHz (74.97%) + 218,525,993 stalled-cycles-frontend:u # 1.19% frontend cycles idle (74.97%) + 6,906,420,713 stalled-cycles-backend:u # 37.62% backend cycles idle (74.89%) + 17,048,626,162 instructions:u # 0.93 insn per cycle + # 0.41 stalled cycles per insn (75.01%) + 5.622622682 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 +Avg ME (F77/GPU) = 1.2828039868165208E-002 +Relative difference = 1.0277079981222336e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.015910e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.185158e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.185158e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.819356 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.218933e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.388755e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.388755e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 5.996185 sec INFO: No Floating Point Exceptions have been reported - 19,469,024,508 cycles # 2.853 GHz - 44,194,459,972 instructions # 2.27 insn per cycle - 6.826135735 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) + 20,008,740,019 cycles:u # 3.326 GHz (75.00%) + 51,656,692 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.94%) + 118,408,767 stalled-cycles-backend:u # 0.59% backend cycles idle (74.94%) + 47,242,044,176 instructions:u # 2.36 insn per cycle + # 0.00 stalled cycles per insn (75.00%) + 6.019345525 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 472) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.484528e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.911785e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.911785e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.870031 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.860775e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.335297e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.335297e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 4.250665 sec INFO: No Floating Point Exceptions have been reported - 13,928,289,994 cycles # 2.857 GHz - 31,840,505,402 instructions # 2.29 insn per cycle - 4.876819018 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1644) (avx2: 0) (512y: 0) (512z: 0) + 13,907,967,553 cycles:u # 3.258 GHz (74.91%) + 53,859,439 stalled-cycles-frontend:u # 0.39% frontend cycles idle (74.89%) + 799,802,404 stalled-cycles-backend:u # 5.75% backend cycles idle (74.97%) + 31,962,126,402 instructions:u # 2.30 insn per cycle + # 0.03 stalled cycles per insn (75.06%) + 4.272203950 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1626) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.770728e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.386706e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.386706e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.194561 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.476048e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.231168e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.231168e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.423761 sec INFO: No Floating Point Exceptions have been reported - 11,361,909,372 cycles # 2.705 GHz - 20,728,193,515 instructions # 1.82 insn per cycle - 4.201564491 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1966) (512y: 0) (512z: 0) + 10,850,734,830 cycles:u # 3.152 GHz (74.91%) + 49,643,502 stalled-cycles-frontend:u # 0.46% frontend cycles idle (74.91%) + 1,327,332,054 stalled-cycles-backend:u # 12.23% backend cycles idle (75.03%) + 20,560,922,677 instructions:u # 1.89 insn per cycle + # 0.06 stalled cycles per insn (75.14%) + 3.446530381 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1964) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 +Avg ME (F77/C++) = 1.2828039868165090E-002 +Relative difference = 1.0277089176796747e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.837895e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.511052e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.511052e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.060412 sec -INFO: No Floating Point Exceptions have been reported - 10,967,372,142 cycles # 2.697 GHz - 20,348,024,135 instructions # 1.86 insn per cycle - 4.067336299 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1690) (512y: 181) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.574771e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.036722e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.036722e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.642745 sec -INFO: No Floating Point Exceptions have been reported - 9,876,735,144 cycles # 2.125 GHz - 16,873,564,045 instructions # 1.71 insn per cycle - 4.649693422 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 901) (512y: 154) (512z: 1258) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt index 752636bf13..62a102726d 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_15:10:56 +DATE: 2024-05-16_16:33:35 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --common OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.514734e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.592124e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.118434e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.296438e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.102193e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.329275e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 1.385277 sec +TOTAL : 4.838234 sec INFO: No Floating Point Exceptions have been reported - 4,585,820,337 cycles # 2.836 GHz - 7,177,605,134 instructions # 1.57 insn per cycle - 1.675534023 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 15,394,934,549 cycles:u # 3.175 GHz (74.78%) + 154,237,641 stalled-cycles-frontend:u # 1.00% frontend cycles idle (74.96%) + 6,875,767,005 stalled-cycles-backend:u # 44.66% backend cycles idle (75.09%) + 11,503,925,750 instructions:u # 0.75 insn per cycle + # 0.60 stalled cycles per insn (75.09%) + 4.891696810 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 +Avg ME (F77/GPU) = 1.2828039868165208E-002 +Relative difference = 1.0277079981222336e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.053610e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.236408e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.236408e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.205745e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.375909e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.375909e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 6.764205 sec +TOTAL : 5.967860 sec INFO: No Floating Point Exceptions have been reported - 19,353,708,257 cycles # 2.859 GHz - 44,070,957,602 instructions # 2.28 insn per cycle - 6.769682162 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) + 19,597,182,675 cycles:u # 3.278 GHz (74.98%) + 51,611,081 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.98%) + 52,432,484 stalled-cycles-backend:u # 0.27% backend cycles idle (74.98%) + 47,079,710,649 instructions:u # 2.40 insn per cycle + # 0.00 stalled cycles per insn (74.98%) + 5.979908816 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 472) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.538542e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.015186e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.015186e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.876605e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.377107e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.377107e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.882143 sec +TOTAL : 4.093970 sec INFO: No Floating Point Exceptions have been reported - 13,858,733,837 cycles # 2.836 GHz - 31,001,638,282 instructions # 2.24 insn per cycle - 4.887574523 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1644) (avx2: 0) (512y: 0) (512z: 0) + 13,286,523,847 cycles:u # 3.237 GHz (74.92%) + 53,103,722 stalled-cycles-frontend:u # 0.40% frontend cycles idle (75.02%) + 696,597,262 stalled-cycles-backend:u # 5.24% backend cycles idle (75.05%) + 31,069,991,906 instructions:u # 2.34 insn per cycle + # 0.02 stalled cycles per insn (75.05%) + 4.105527074 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1626) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.916460e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.668694e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.668694e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.592189e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.449037e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.449037e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.073472 sec +TOTAL : 3.167352 sec INFO: No Floating Point Exceptions have been reported - 11,156,313,503 cycles # 2.736 GHz - 19,267,334,271 instructions # 1.73 insn per cycle - 4.078862770 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1966) (512y: 0) (512z: 0) + 10,175,950,003 cycles:u # 3.202 GHz (74.93%) + 52,112,697 stalled-cycles-frontend:u # 0.51% frontend cycles idle (75.05%) + 1,283,978,771 stalled-cycles-backend:u # 12.62% backend cycles idle (75.08%) + 19,503,807,353 instructions:u # 1.92 insn per cycle + # 0.07 stalled cycles per insn (75.08%) + 3.179566398 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1964) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 +Avg ME (F77/C++) = 1.2828039868165090E-002 +Relative difference = 1.0277089176796747e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.001009e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.815270e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.815270e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.939832 sec -INFO: No Floating Point Exceptions have been reported - 10,815,664,939 cycles # 2.742 GHz - 18,691,798,772 instructions # 1.73 insn per cycle - 3.945208768 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1690) (512y: 181) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.659550e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.188571e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.188571e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.600299 sec -INFO: No Floating Point Exceptions have been reported - 9,711,022,403 cycles # 2.112 GHz - 15,432,876,214 instructions # 1.59 insn per cycle - 4.605929662 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 901) (512y: 154) (512z: 1258) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt index 8320028620..6b8c442cf3 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt @@ -1,220 +1,176 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_15:05:29 +DATE: 2024-05-16_16:31:23 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 12 --rmbhst OMP= +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.845001e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.545626e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.012755e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.945484 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.420460e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.083294e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.310383e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 5.495814 sec INFO: No Floating Point Exceptions have been reported - 6,182,952,030 cycles # 2.840 GHz - 11,472,065,600 instructions # 1.86 insn per cycle - 2.234065267 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 17,943,418,670 cycles:u # 3.271 GHz (74.94%) + 220,924,204 stalled-cycles-frontend:u # 1.23% frontend cycles idle (74.98%) + 6,860,803,865 stalled-cycles-backend:u # 38.24% backend cycles idle (75.06%) + 16,753,890,275 instructions:u # 0.93 insn per cycle + # 0.41 stalled cycles per insn (75.00%) + 5.547810485 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 +Avg ME (F77/GPU) = 1.2828039868165208E-002 +Relative difference = 1.0277079981222336e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.053191e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.234940e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.234940e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.394281 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.183732e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.346918e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.346918e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 6.088321 sec INFO: No Floating Point Exceptions have been reported - 18,294,474,856 cycles # 2.859 GHz - 43,971,000,114 instructions # 2.40 insn per cycle - 6.399562206 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) + 19,581,171,404 cycles:u # 3.210 GHz (74.96%) + 50,180,129 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.96%) + 47,121,604 stalled-cycles-backend:u # 0.24% backend cycles idle (74.96%) + 46,989,064,995 instructions:u # 2.40 insn per cycle + # 0.00 stalled cycles per insn (74.97%) + 6.101947004 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 472) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.557791e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.040989e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.040989e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.463787 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.867754e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.359860e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.359860e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 4.119915 sec INFO: No Floating Point Exceptions have been reported - 12,772,778,524 cycles # 2.859 GHz - 30,998,712,334 instructions # 2.43 insn per cycle - 4.469196075 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1644) (avx2: 0) (512y: 0) (512z: 0) + 13,263,190,140 cycles:u # 3.210 GHz (74.95%) + 51,197,022 stalled-cycles-frontend:u # 0.39% frontend cycles idle (75.03%) + 820,166,823 stalled-cycles-backend:u # 6.18% backend cycles idle (75.03%) + 31,165,784,636 instructions:u # 2.35 insn per cycle + # 0.03 stalled cycles per insn (75.03%) + 4.134555430 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1626) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.925684e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.680642e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.680642e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.688587 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.630821e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.488220e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.488220e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.129720 sec INFO: No Floating Point Exceptions have been reported - 10,072,112,495 cycles # 2.727 GHz - 19,365,616,714 instructions # 1.92 insn per cycle - 3.694022814 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1966) (512y: 0) (512z: 0) + 10,175,967,529 cycles:u # 3.239 GHz (74.89%) + 52,037,851 stalled-cycles-frontend:u # 0.51% frontend cycles idle (75.01%) + 1,289,035,436 stalled-cycles-backend:u # 12.67% backend cycles idle (75.05%) + 19,120,632,315 instructions:u # 1.88 insn per cycle + # 0.07 stalled cycles per insn (75.05%) + 3.143235076 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1964) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 +Avg ME (F77/C++) = 1.2828039868165090E-002 +Relative difference = 1.0277089176796747e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.999182e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.825411e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.825411e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.565445 sec -INFO: No Floating Point Exceptions have been reported - 9,737,532,909 cycles # 2.728 GHz - 18,976,607,709 instructions # 1.95 insn per cycle - 3.570830090 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1690) (512y: 181) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.671566e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.197159e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.197159e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.185653 sec -INFO: No Floating Point Exceptions have been reported - 8,570,262,444 cycles # 2.045 GHz - 15,727,819,138 instructions # 1.84 insn per cycle - 4.191109662 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 901) (512y: 154) (512z: 1258) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt index 6aa3de3ecf..e28e3db96a 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_14:32:42 +DATE: 2024-05-16_15:54:55 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.832072e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.951586e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.177922e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.666506 sec +EvtsPerSec[Rmb+ME] (23) = ( 5.849422e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.590513e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.911390e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 +TOTAL : 0.510534 sec INFO: No Floating Point Exceptions have been reported - 2,570,661,306 cycles # 2.841 GHz - 3,994,547,928 instructions # 1.55 insn per cycle - 0.967516454 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 1,226,528,626 cycles:u # 2.377 GHz (73.52%) + 2,221,159 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.57%) + 5,251,388 stalled-cycles-backend:u # 0.43% backend cycles idle (75.22%) + 2,084,638,085 instructions:u # 1.70 insn per cycle + # 0.00 stalled cycles per insn (75.35%) + 0.567128402 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 +Avg ME (F77/GPU) = 1.2828039868165216E-002 +Relative difference = 1.0277079305077159e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.115974e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.322257e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.322257e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.054676 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.294661e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.490725e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.490725e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 5.604903 sec INFO: No Floating Point Exceptions have been reported - 17,515,565,744 cycles # 2.891 GHz - 41,813,477,100 instructions # 2.39 insn per cycle - 6.059803806 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 392) (avx2: 0) (512y: 0) (512z: 0) + 18,501,031,339 cycles:u # 3.294 GHz (74.96%) + 51,332,716 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.94%) + 844,407,865 stalled-cycles-backend:u # 4.56% backend cycles idle (74.94%) + 44,830,087,800 instructions:u # 2.42 insn per cycle + # 0.02 stalled cycles per insn (74.97%) + 5.619831578 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 486) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.620797e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.138518e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.138518e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.301844 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.977593e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.507832e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.507832e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.909334 sec INFO: No Floating Point Exceptions have been reported - 12,450,766,554 cycles # 2.891 GHz - 30,161,114,565 instructions # 2.42 insn per cycle - 4.307292943 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1612) (avx2: 0) (512y: 0) (512z: 0) + 12,867,611,585 cycles:u # 3.283 GHz (74.92%) + 49,300,942 stalled-cycles-frontend:u # 0.38% frontend cycles idle (74.91%) + 1,659,477,790 stalled-cycles-backend:u # 12.90% backend cycles idle (74.93%) + 30,159,312,555 instructions:u # 2.34 insn per cycle + # 0.06 stalled cycles per insn (75.03%) + 3.924169057 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1569) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.953305e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.731201e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.731201e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.641578 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.544143e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.365618e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.365618e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.213304 sec INFO: No Floating Point Exceptions have been reported - 9,958,194,708 cycles # 2.732 GHz - 19,097,340,022 instructions # 1.92 insn per cycle - 3.646748223 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1931) (512y: 0) (512z: 0) + 10,293,286,723 cycles:u # 3.193 GHz (74.95%) + 50,260,134 stalled-cycles-frontend:u # 0.49% frontend cycles idle (74.94%) + 334,691,875 stalled-cycles-backend:u # 3.25% backend cycles idle (74.95%) + 18,981,075,658 instructions:u # 1.84 insn per cycle + # 0.02 stalled cycles per insn (74.95%) + 3.227163156 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1902) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165090E-002 Relative difference = 1.0277089176796747e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.030144e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.871070e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.871070e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.516336 sec -INFO: No Floating Point Exceptions have been reported - 9,615,329,857 cycles # 2.731 GHz - 18,757,197,169 instructions # 1.95 insn per cycle - 3.521486960 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1662) (512y: 178) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165090E-002 -Relative difference = 1.0277089176796747e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.716452e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.276447e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.276447e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.085042 sec -INFO: No Floating Point Exceptions have been reported - 8,419,965,935 cycles # 2.059 GHz - 15,604,092,420 instructions # 1.85 insn per cycle - 4.090208290 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 887) (512y: 156) (512z: 1239) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt index b8b45776b1..7c1de5f530 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_14:50:17 +DATE: 2024-05-16_16:14:10 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 12 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.702175e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.710921e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.156854e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.694390 sec +EvtsPerSec[Rmb+ME] (23) = ( 5.180573e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.087080e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.343874e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 +TOTAL : 0.510647 sec INFO: No Floating Point Exceptions have been reported - 2,605,646,002 cycles # 2.805 GHz - 4,054,878,448 instructions # 1.56 insn per cycle - 0.990293563 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 1,317,606,045 cycles:u # 2.555 GHz (75.14%) + 2,106,656 stalled-cycles-frontend:u # 0.16% frontend cycles idle (75.24%) + 5,011,590 stalled-cycles-backend:u # 0.38% backend cycles idle (75.33%) + 2,061,741,334 instructions:u # 1.56 insn per cycle + # 0.00 stalled cycles per insn (75.40%) + 0.562010535 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 +Avg ME (F77/GPU) = 1.2828039868165208E-002 +Relative difference = 1.0277079981222336e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.570616e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.013848e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.013848e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.427060 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.790157e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.173751e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.173751e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 4.241298 sec INFO: No Floating Point Exceptions have been reported - 12,653,335,495 cycles # 2.855 GHz - 32,508,582,789 instructions # 2.57 insn per cycle - 4.432506316 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 296) (avx2: 0) (512y: 0) (512z: 0) + 14,122,804,467 cycles:u # 3.322 GHz (75.02%) + 52,780,179 stalled-cycles-frontend:u # 0.37% frontend cycles idle (74.98%) + 742,554,443 stalled-cycles-backend:u # 5.26% backend cycles idle (74.98%) + 36,765,556,155 instructions:u # 2.60 insn per cycle + # 0.02 stalled cycles per insn (74.98%) + 4.255166528 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 707) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.997942e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.867798e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.867798e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.572733 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.354973e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.172546e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.172546e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.404789 sec INFO: No Floating Point Exceptions have been reported - 10,224,022,422 cycles # 2.858 GHz - 24,474,305,392 instructions # 2.39 insn per cycle - 3.578147466 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1251) (avx2: 0) (512y: 0) (512z: 0) + 10,939,804,848 cycles:u # 3.203 GHz (74.96%) + 52,338,890 stalled-cycles-frontend:u # 0.48% frontend cycles idle (74.94%) + 156,607,813 stalled-cycles-backend:u # 1.43% backend cycles idle (74.95%) + 24,783,195,117 instructions:u # 2.27 insn per cycle + # 0.01 stalled cycles per insn (74.95%) + 3.419320010 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2334) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.166030e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.174429e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.174429e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.325864 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 3.029032e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.216078e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.216078e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.801238 sec INFO: No Floating Point Exceptions have been reported - 9,098,194,590 cycles # 2.732 GHz - 16,922,780,551 instructions # 1.86 insn per cycle - 3.331363940 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1631) (512y: 0) (512z: 0) + 9,086,876,579 cycles:u # 3.231 GHz (74.97%) + 49,992,624 stalled-cycles-frontend:u # 0.55% frontend cycles idle (74.97%) + 136,988,259 stalled-cycles-backend:u # 1.51% backend cycles idle (74.98%) + 16,857,366,216 instructions:u # 1.86 insn per cycle + # 0.01 stalled cycles per insn (74.98%) + 2.832994199 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1604) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 +Avg ME (F77/C++) = 1.2828039868165090E-002 +Relative difference = 1.0277089176796747e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.212975e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.265291e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.265291e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.263243 sec -INFO: No Floating Point Exceptions have been reported - 8,899,581,855 cycles # 2.723 GHz - 16,332,700,862 instructions # 1.84 insn per cycle - 3.268811314 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1370) (512y: 139) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.881513e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.573087e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.573087e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.766130 sec -INFO: No Floating Point Exceptions have been reported - 7,870,004,063 cycles # 2.087 GHz - 14,582,523,760 instructions # 1.85 insn per cycle - 3.771527980 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1015) (512y: 158) (512z: 955) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt index 36ca3a055a..7bd614f453 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_14:50:44 +DATE: 2024-05-16_16:14:26 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 12 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.703728e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.728666e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.213805e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.683308 sec +EvtsPerSec[Rmb+ME] (23) = ( 5.651323e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.584556e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.905916e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 +TOTAL : 0.514513 sec INFO: No Floating Point Exceptions have been reported - 2,600,218,993 cycles # 2.827 GHz - 4,020,842,023 instructions # 1.55 insn per cycle - 0.979103636 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 1,280,503,839 cycles:u # 2.435 GHz (73.24%) + 2,277,448 stalled-cycles-frontend:u # 0.18% frontend cycles idle (73.70%) + 5,137,824 stalled-cycles-backend:u # 0.40% backend cycles idle (75.39%) + 2,023,880,849 instructions:u # 1.58 insn per cycle + # 0.00 stalled cycles per insn (75.82%) + 0.570351573 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 +Avg ME (F77/GPU) = 1.2828039868165216E-002 +Relative difference = 1.0277079305077159e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.089739e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.950650e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.950650e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.429960 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.400394e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.175393e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.175393e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.357588 sec INFO: No Floating Point Exceptions have been reported - 9,811,818,087 cycles # 2.857 GHz - 25,388,363,151 instructions # 2.59 insn per cycle - 3.435408237 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 249) (avx2: 0) (512y: 0) (512z: 0) + 10,773,216,593 cycles:u # 3.199 GHz (74.90%) + 50,692,552 stalled-cycles-frontend:u # 0.47% frontend cycles idle (75.02%) + 49,096,202 stalled-cycles-backend:u # 0.46% backend cycles idle (75.06%) + 28,434,930,563 instructions:u # 2.64 insn per cycle + # 0.00 stalled cycles per insn (75.06%) + 3.373586068 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 600) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.308707e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.576242e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.576242e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.150608 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.618000e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.644001e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.644001e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.133129 sec INFO: No Floating Point Exceptions have been reported - 9,017,040,298 cycles # 2.858 GHz - 21,483,572,468 instructions # 2.38 insn per cycle - 3.156151233 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1119) (avx2: 0) (512y: 0) (512z: 0) + 10,198,671,312 cycles:u # 3.245 GHz (74.90%) + 57,108,535 stalled-cycles-frontend:u # 0.56% frontend cycles idle (74.91%) + 61,941,326 stalled-cycles-backend:u # 0.61% backend cycles idle (75.03%) + 21,473,197,144 instructions:u # 2.11 insn per cycle + # 0.00 stalled cycles per insn (75.06%) + 3.174997823 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2117) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.317514e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.502934e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.502934e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.137476 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 3.243324e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.693698e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.693698e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.677364 sec INFO: No Floating Point Exceptions have been reported - 8,591,075,632 cycles # 2.734 GHz - 15,811,134,800 instructions # 1.84 insn per cycle - 3.143116597 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1494) (512y: 0) (512z: 0) + 8,473,678,842 cycles:u # 3.153 GHz (75.00%) + 49,686,047 stalled-cycles-frontend:u # 0.59% frontend cycles idle (75.00%) + 65,234,637 stalled-cycles-backend:u # 0.77% backend cycles idle (75.00%) + 15,802,097,675 instructions:u # 1.86 insn per cycle + # 0.00 stalled cycles per insn (75.00%) + 2.690233127 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1497) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 +Avg ME (F77/C++) = 1.2828039868165084E-002 +Relative difference = 1.0277089582483854e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.364262e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.604788e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.604788e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.081738 sec -INFO: No Floating Point Exceptions have been reported - 8,442,051,612 cycles # 2.735 GHz - 15,504,513,991 instructions # 1.84 insn per cycle - 3.087247738 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1268) (512y: 139) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.995819e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.803385e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.803385e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.575599 sec -INFO: No Floating Point Exceptions have been reported - 7,560,717,738 cycles # 2.112 GHz - 14,283,918,013 instructions # 1.89 insn per cycle - 3.581217674 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1041) (512y: 164) (512z: 874) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index e6a48e18ea..947ead6bb7 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_14:33:12 +DATE: 2024-05-16_15:55:13 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.602023e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.319974e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.288580e+09 ) sec^-1 -MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.573413 sec +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=1, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.880048e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.082152e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.804795e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.372027e-02 +- 3.270769e-06 ) GeV^0 +TOTAL : 0.398295 sec INFO: No Floating Point Exceptions have been reported - 2,248,961,780 cycles # 2.823 GHz - 3,510,545,687 instructions # 1.56 insn per cycle - 0.854969378 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 911,414,092 cycles:u # 2.228 GHz (75.56%) + 2,219,034 stalled-cycles-frontend:u # 0.24% frontend cycles idle (75.94%) + 5,292,087 stalled-cycles-backend:u # 0.58% backend cycles idle (75.81%) + 1,916,223,650 instructions:u # 2.10 insn per cycle + # 0.00 stalled cycles per insn (75.59%) + 0.447965198 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828112125134794E-002 -Relative difference = 7.1815552823662555e-06 +Avg ME (F77/GPU) = 1.2828036033170065E-002 +Relative difference = 1.2498553996774023e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.082006e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.283429e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.283429e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.191643 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.395142e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.620561e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.620561e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 +TOTAL : 5.208988 sec INFO: No Floating Point Exceptions have been reported - 17,740,084,916 cycles # 2.864 GHz - 43,510,870,904 instructions # 2.45 insn per cycle - 6.196630799 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) + 17,164,934,882 cycles:u # 3.291 GHz (75.00%) + 39,709,044 stalled-cycles-frontend:u # 0.23% frontend cycles idle (75.00%) + 32,772,868 stalled-cycles-backend:u # 0.19% backend cycles idle (75.00%) + 47,103,841,077 instructions:u # 2.74 insn per cycle + # 0.00 stalled cycles per insn (75.00%) + 5.264288529 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 542) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039854866802E-002 -Relative difference = 1.1313746984080878e-08 +Avg ME (F77/C++) = 1.2828039569285465E-002 +Relative difference = 3.357602059382168e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.223628e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.400190e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.400190e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.211334 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.889573e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.119819e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.119819e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 +TOTAL : 2.857430 sec INFO: No Floating Point Exceptions have been reported - 9,244,093,548 cycles # 2.875 GHz - 21,907,620,538 instructions # 2.37 insn per cycle - 3.216400901 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1938) (avx2: 0) (512y: 0) (512z: 0) + 9,161,999,343 cycles:u # 3.198 GHz (74.88%) + 41,164,438 stalled-cycles-frontend:u # 0.45% frontend cycles idle (74.88%) + 917,958,407 stalled-cycles-backend:u # 10.02% backend cycles idle (75.01%) + 22,149,269,363 instructions:u # 2.42 insn per cycle + # 0.04 stalled cycles per insn (75.15%) + 2.885413922 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1883) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039280066150E-002 -Relative difference = 5.612189004572479e-08 +Avg ME (F77/C++) = 1.2828039385567536E-002 +Relative difference = 4.7897610623017996e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.419643e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.716778e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.716778e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.967957 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 3.345201e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.882374e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.882374e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 2.561943 sec INFO: No Floating Point Exceptions have been reported - 8,316,472,651 cycles # 2.798 GHz - 15,592,546,873 instructions # 1.87 insn per cycle - 2.972957823 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2596) (512y: 0) (512z: 0) + 8,174,925,567 cycles:u # 3.180 GHz (74.72%) + 44,207,526 stalled-cycles-frontend:u # 0.54% frontend cycles idle (74.98%) + 1,253,263,119 stalled-cycles-backend:u # 15.33% backend cycles idle (75.10%) + 15,464,275,545 instructions:u # 1.89 insn per cycle + # 0.08 stalled cycles per insn (75.10%) + 2.576121476 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 +Avg ME (F77/C++) = 1.2828053369958070E-002 +Relative difference = 2.627022867500074e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.415944e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.716005e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.716005e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.973842 sec -INFO: No Floating Point Exceptions have been reported - 8,315,082,395 cycles # 2.793 GHz - 15,436,266,122 instructions # 1.86 insn per cycle - 2.978955673 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.416813e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.677411e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.677411e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.974949 sec -INFO: No Floating Point Exceptions have been reported - 6,610,937,423 cycles # 2.219 GHz - 12,863,752,208 instructions # 1.95 insn per cycle - 2.980091060 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828052585973637E-002 -Relative difference = 2.0158743040564767e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt index 29c604a610..1b97ebb2e9 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt @@ -1,231 +1,182 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_15:00:26 +DATE: 2024-05-16_16:26:50 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --bridge OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 12 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.941162e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.420865e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.420865e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 -TOTAL : 1.745821 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.419416e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.312981e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.312981e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371886e-02 +- 3.270260e-06 ) GeV^0 +TOTAL : 5.477791 sec INFO: No Floating Point Exceptions have been reported - 5,608,708,868 cycles # 2.838 GHz - 10,190,752,473 instructions # 1.82 insn per cycle - 2.033892232 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 17,800,579,188 cycles:u # 3.236 GHz (74.97%) + 120,081,643 stalled-cycles-frontend:u # 0.67% frontend cycles idle (75.00%) + 6,869,674,283 stalled-cycles-backend:u # 38.59% backend cycles idle (75.00%) + 17,127,606,895 instructions:u # 0.96 insn per cycle + # 0.40 stalled cycles per insn (75.03%) + 5.530316842 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828112125134794E-002 -Relative difference = 7.1815552823662555e-06 +Avg ME (F77/GPU) = 1.2828036033170065E-002 +Relative difference = 1.2498553996774023e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.055357e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.251163e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.251163e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.442554 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.372295e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.590791e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.590791e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 +TOTAL : 5.346442 sec INFO: No Floating Point Exceptions have been reported - 18,431,262,224 cycles # 2.859 GHz - 43,659,496,470 instructions # 2.37 insn per cycle - 6.448903506 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) + 17,427,678,130 cycles:u # 3.252 GHz (74.94%) + 39,859,631 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.93%) + 67,838,113 stalled-cycles-backend:u # 0.39% backend cycles idle (74.96%) + 47,375,316,725 instructions:u # 2.72 insn per cycle + # 0.00 stalled cycles per insn (75.03%) + 5.362445974 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 542) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039854866802E-002 -Relative difference = 1.1313746984080878e-08 +Avg ME (F77/C++) = 1.2828039569285465E-002 +Relative difference = 3.357602059382168e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.110619e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.160777e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.160777e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.492633 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.753542e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.867887e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.867887e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 +TOTAL : 3.053973 sec INFO: No Floating Point Exceptions have been reported - 9,993,500,583 cycles # 2.857 GHz - 23,243,476,984 instructions # 2.33 insn per cycle - 3.498991107 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1938) (avx2: 0) (512y: 0) (512z: 0) + 9,572,820,101 cycles:u # 3.122 GHz (74.95%) + 42,610,043 stalled-cycles-frontend:u # 0.45% frontend cycles idle (74.96%) + 957,509,627 stalled-cycles-backend:u # 10.00% backend cycles idle (74.96%) + 23,541,807,394 instructions:u # 2.46 insn per cycle + # 0.04 stalled cycles per insn (74.95%) + 3.069040228 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1883) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039280066150E-002 -Relative difference = 5.612189004572479e-08 +Avg ME (F77/C++) = 1.2828039385567536E-002 +Relative difference = 4.7897610623017996e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.275264e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.392372e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.392372e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.272099 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 3.187475e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.621381e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.621381e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 2.755362 sec INFO: No Floating Point Exceptions have been reported - 9,005,707,266 cycles # 2.748 GHz - 16,711,349,389 instructions # 1.86 insn per cycle - 3.278668519 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2596) (512y: 0) (512z: 0) + 8,527,609,479 cycles:u # 3.081 GHz (74.87%) + 44,341,079 stalled-cycles-frontend:u # 0.52% frontend cycles idle (74.86%) + 1,278,366,033 stalled-cycles-backend:u # 14.99% backend cycles idle (74.92%) + 16,646,340,310 instructions:u # 1.95 insn per cycle + # 0.08 stalled cycles per insn (75.07%) + 2.770557791 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 +Avg ME (F77/C++) = 1.2828053369958070E-002 +Relative difference = 2.627022867500074e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.297831e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.443567e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.443567e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.244587 sec -INFO: No Floating Point Exceptions have been reported - 8,928,752,660 cycles # 2.747 GHz - 16,549,135,089 instructions # 1.85 insn per cycle - 3.250993607 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.241559e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.306249e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.306249e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 3.324488 sec -INFO: No Floating Point Exceptions have been reported - 7,378,511,382 cycles # 2.216 GHz - 14,071,008,703 instructions # 1.91 insn per cycle - 3.330848983 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828052585973637E-002 -Relative difference = 2.0158743040564767e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt index 8016aaf3c8..fdebe20463 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_15:11:30 +DATE: 2024-05-16_16:33:58 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --common OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.314779e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.179276e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.254245e+09 ) sec^-1 -MeanMatrixElemValue = ( 1.371863e-02 +- 3.269951e-06 ) GeV^0 -TOTAL : 1.213916 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.184024e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.064173e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.779084e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371895e-02 +- 3.272985e-06 ) GeV^0 +TOTAL : 4.708045 sec INFO: No Floating Point Exceptions have been reported - 4,089,914,869 cycles # 2.847 GHz - 6,594,462,327 instructions # 1.61 insn per cycle - 1.494122889 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 14,980,300,908 cycles:u # 3.189 GHz (74.98%) + 53,407,838 stalled-cycles-frontend:u # 0.36% frontend cycles idle (75.04%) + 6,811,892,378 stalled-cycles-backend:u # 45.47% backend cycles idle (75.04%) + 11,342,919,447 instructions:u # 0.76 insn per cycle + # 0.60 stalled cycles per insn (74.96%) + 4.758589849 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828112125134794E-002 -Relative difference = 7.1815552823662555e-06 +Avg ME (F77/GPU) = 1.2828036033170065E-002 +Relative difference = 1.2498553996774023e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.089350e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.292539e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.292539e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.394442e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.621360e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.621360e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 6.490729 sec +TOTAL : 5.212454 sec INFO: No Floating Point Exceptions have been reported - 18,740,706,935 cycles # 2.886 GHz - 43,689,321,367 instructions # 2.33 insn per cycle - 6.495941000 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) + 17,164,329,028 cycles:u # 3.288 GHz (75.02%) + 39,152,890 stalled-cycles-frontend:u # 0.23% frontend cycles idle (75.02%) + 32,697,264 stalled-cycles-backend:u # 0.19% backend cycles idle (75.02%) + 47,185,834,568 instructions:u # 2.75 insn per cycle + # 0.00 stalled cycles per insn (75.02%) + 5.221195761 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 542) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039854866802E-002 -Relative difference = 1.1313746984080878e-08 +Avg ME (F77/C++) = 1.2828039569285465E-002 +Relative difference = 3.357602059382168e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.213519e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.407340e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.407340e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.933527e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.182670e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.182670e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 3.565494 sec +TOTAL : 2.825501 sec INFO: No Floating Point Exceptions have been reported - 10,288,737,724 cycles # 2.883 GHz - 21,988,558,280 instructions # 2.14 insn per cycle - 3.570732391 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1938) (avx2: 0) (512y: 0) (512z: 0) + 9,155,103,067 cycles:u # 3.232 GHz (74.87%) + 42,040,255 stalled-cycles-frontend:u # 0.46% frontend cycles idle (74.88%) + 918,495,192 stalled-cycles-backend:u # 10.03% backend cycles idle (74.88%) + 22,182,387,469 instructions:u # 2.42 insn per cycle + # 0.04 stalled cycles per insn (75.01%) + 2.836069891 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1883) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039280066150E-002 -Relative difference = 5.612189004572479e-08 +Avg ME (F77/C++) = 1.2828039385567536E-002 +Relative difference = 4.7897610623017996e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.392103e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.656361e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.656361e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 3.425850e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.022951e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.022951e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.347045 sec +TOTAL : 2.509275 sec INFO: No Floating Point Exceptions have been reported - 9,294,224,919 cycles # 2.774 GHz - 15,502,535,760 instructions # 1.67 insn per cycle - 3.352354405 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2596) (512y: 0) (512z: 0) + 8,126,245,218 cycles:u # 3.229 GHz (74.89%) + 43,460,542 stalled-cycles-frontend:u # 0.53% frontend cycles idle (74.90%) + 1,270,506,684 stalled-cycles-backend:u # 15.63% backend cycles idle (74.90%) + 15,524,237,487 instructions:u # 1.91 insn per cycle + # 0.08 stalled cycles per insn (75.01%) + 2.517716050 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 +Avg ME (F77/C++) = 1.2828053369958070E-002 +Relative difference = 2.627022867500074e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.411853e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.706271e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.706271e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.334245 sec -INFO: No Floating Point Exceptions have been reported - 9,229,877,586 cycles # 2.765 GHz - 15,144,508,612 instructions # 1.64 insn per cycle - 3.339505215 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.389642e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.623022e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.623022e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.358884 sec -INFO: No Floating Point Exceptions have been reported - 7,623,474,420 cycles # 2.266 GHz - 12,573,351,599 instructions # 1.65 insn per cycle - 3.364654068 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828052585973637E-002 -Relative difference = 2.0158743040564767e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt index ef6806658f..35ec3b599c 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt @@ -1,220 +1,176 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_15:06:01 +DATE: 2024-05-16_16:31:47 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 12 --rmbhst OMP= +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.717298e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.145539e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.143407e+09 ) sec^-1 -MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 -TOTAL : 1.533112 sec +EvtsPerSec[Rmb+ME] (23) = ( 8.191751e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.918687e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.609131e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371886e-02 +- 3.270260e-06 ) GeV^0 +TOTAL : 5.314497 sec INFO: No Floating Point Exceptions have been reported - 4,980,418,158 cycles # 2.839 GHz - 9,119,342,139 instructions # 1.83 insn per cycle - 1.812784805 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 17,580,992,681 cycles:u # 3.298 GHz (74.94%) + 119,557,715 stalled-cycles-frontend:u # 0.68% frontend cycles idle (74.94%) + 6,756,513,568 stalled-cycles-backend:u # 38.43% backend cycles idle (74.97%) + 16,772,724,997 instructions:u # 0.95 insn per cycle + # 0.40 stalled cycles per insn (75.08%) + 5.365969775 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828112125134794E-002 -Relative difference = 7.1815552823662555e-06 +Avg ME (F77/GPU) = 1.2828036033170065E-002 +Relative difference = 1.2498553996774023e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.069115e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.265540e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.265540e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.262605 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.417462e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.646807e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.646807e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 +TOTAL : 5.127587 sec INFO: No Floating Point Exceptions have been reported - 17,921,464,120 cycles # 2.860 GHz - 43,508,155,770 instructions # 2.43 insn per cycle - 6.267871711 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) + 17,233,558,516 cycles:u # 3.356 GHz (74.93%) + 40,280,100 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.92%) + 35,081,862 stalled-cycles-backend:u # 0.20% backend cycles idle (74.94%) + 47,162,582,218 instructions:u # 2.74 insn per cycle + # 0.00 stalled cycles per insn (75.01%) + 5.136764939 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 542) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039854866802E-002 -Relative difference = 1.1313746984080878e-08 +Avg ME (F77/C++) = 1.2828039569285465E-002 +Relative difference = 3.357602059382168e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.205993e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.384022e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.384022e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.237394 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.856364e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.058154e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.058154e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 +TOTAL : 2.890363 sec INFO: No Floating Point Exceptions have been reported - 9,270,351,681 cycles # 2.860 GHz - 21,907,147,046 instructions # 2.36 insn per cycle - 3.242634203 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1938) (avx2: 0) (512y: 0) (512z: 0) + 9,245,390,179 cycles:u # 3.190 GHz (74.88%) + 42,576,371 stalled-cycles-frontend:u # 0.46% frontend cycles idle (74.88%) + 951,286,364 stalled-cycles-backend:u # 10.29% backend cycles idle (74.87%) + 22,150,825,461 instructions:u # 2.40 insn per cycle + # 0.04 stalled cycles per insn (74.96%) + 2.899299092 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1883) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039280066150E-002 -Relative difference = 5.612189004572479e-08 +Avg ME (F77/C++) = 1.2828039385567536E-002 +Relative difference = 4.7897610623017996e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.379256e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.625966e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.625966e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.019248 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 3.368969e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.936597e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.936597e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 2.551005 sec INFO: No Floating Point Exceptions have been reported - 8,297,873,717 cycles # 2.744 GHz - 15,590,905,283 instructions # 1.88 insn per cycle - 3.024598202 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2596) (512y: 0) (512z: 0) + 8,148,223,721 cycles:u # 3.185 GHz (74.99%) + 43,084,446 stalled-cycles-frontend:u # 0.53% frontend cycles idle (74.98%) + 1,255,704,336 stalled-cycles-backend:u # 15.41% backend cycles idle (74.98%) + 15,486,815,293 instructions:u # 1.90 insn per cycle + # 0.08 stalled cycles per insn (74.98%) + 2.559787773 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 +Avg ME (F77/C++) = 1.2828053369958070E-002 +Relative difference = 2.627022867500074e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.398987e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.671948e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.671948e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.000824 sec -INFO: No Floating Point Exceptions have been reported - 8,223,361,227 cycles # 2.736 GHz - 15,434,590,756 instructions # 1.88 insn per cycle - 3.006023707 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.360241e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.570739e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.570739e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 3.046199 sec -INFO: No Floating Point Exceptions have been reported - 6,615,724,908 cycles # 2.169 GHz - 12,863,710,849 instructions # 1.94 insn per cycle - 3.051492012 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828052585973637E-002 -Relative difference = 2.0158743040564767e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt index b613786442..c03e7d3f34 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_14:33:39 +DATE: 2024-05-16_15:55:29 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.604585e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.336536e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.343718e+09 ) sec^-1 -MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.571057 sec +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=1, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.912828e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.116110e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.852680e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.372027e-02 +- 3.270769e-06 ) GeV^0 +TOTAL : 0.398467 sec INFO: No Floating Point Exceptions have been reported - 2,242,744,669 cycles # 2.822 GHz - 3,531,920,926 instructions # 1.57 insn per cycle - 0.851832101 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 95 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 954,128,617 cycles:u # 2.309 GHz (74.47%) + 2,180,585 stalled-cycles-frontend:u # 0.23% frontend cycles idle (75.29%) + 4,555,761 stalled-cycles-backend:u # 0.48% backend cycles idle (74.06%) + 1,856,624,951 instructions:u # 1.95 insn per cycle + # 0.00 stalled cycles per insn (74.63%) + 0.447288567 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828112125134794E-002 -Relative difference = 7.1815552823662555e-06 +Avg ME (F77/GPU) = 1.2828036033170065E-002 +Relative difference = 1.2498553996774023e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.153085e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.384746e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.384746e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 5.830325 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.492433e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.754217e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.754217e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 +TOTAL : 4.901236 sec INFO: No Floating Point Exceptions have been reported - 16,691,813,815 cycles # 2.861 GHz - 41,266,181,474 instructions # 2.47 insn per cycle - 5.835359179 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 375) (avx2: 0) (512y: 0) (512z: 0) + 15,963,575,794 cycles:u # 3.252 GHz (74.94%) + 40,480,511 stalled-cycles-frontend:u # 0.25% frontend cycles idle (74.92%) + 27,696,228 stalled-cycles-backend:u # 0.17% backend cycles idle (74.93%) + 44,027,177,404 instructions:u # 2.76 insn per cycle + # 0.00 stalled cycles per insn (75.00%) + 4.912881175 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 467) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039854866802E-002 -Relative difference = 1.1313746984080878e-08 +Avg ME (F77/C++) = 1.2828039569285465E-002 +Relative difference = 3.357602059382168e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.304367e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.587722e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.587722e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.109672 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 3.003327e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.314277e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.314277e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 +TOTAL : 2.782850 sec INFO: No Floating Point Exceptions have been reported - 8,995,426,679 cycles # 2.889 GHz - 21,211,089,826 instructions # 2.36 insn per cycle - 3.114839321 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1843) (avx2: 0) (512y: 0) (512z: 0) + 8,914,677,636 cycles:u # 3.194 GHz (75.00%) + 40,778,600 stalled-cycles-frontend:u # 0.46% frontend cycles idle (75.06%) + 965,278,963 stalled-cycles-backend:u # 10.83% backend cycles idle (75.06%) + 21,627,615,188 instructions:u # 2.43 insn per cycle + # 0.04 stalled cycles per insn (75.07%) + 2.813249066 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1827) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039280066150E-002 -Relative difference = 5.612189004572479e-08 +Avg ME (F77/C++) = 1.2828039385567536E-002 +Relative difference = 4.7897610623017996e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.420820e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.716893e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.716893e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.969606 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 3.412090e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.030398e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.030398e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 2.519419 sec INFO: No Floating Point Exceptions have been reported - 8,272,952,138 cycles # 2.782 GHz - 15,425,102,157 instructions # 1.86 insn per cycle - 2.974640700 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2537) (512y: 0) (512z: 0) + 8,052,770,656 cycles:u # 3.187 GHz (74.99%) + 43,661,657 stalled-cycles-frontend:u # 0.54% frontend cycles idle (74.99%) + 1,618,780,668 stalled-cycles-backend:u # 20.10% backend cycles idle (74.99%) + 15,326,595,516 instructions:u # 1.90 insn per cycle + # 0.11 stalled cycles per insn (74.99%) + 2.529617963 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2542) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 +Avg ME (F77/C++) = 1.2828053369958070E-002 +Relative difference = 2.627022867500074e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.475869e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.854556e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.854556e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.912175 sec -INFO: No Floating Point Exceptions have been reported - 8,117,590,540 cycles # 2.783 GHz - 15,233,342,033 instructions # 1.88 insn per cycle - 2.917189383 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2423) (512y: 8) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.412788e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.668874e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.668874e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.982256 sec -INFO: No Floating Point Exceptions have been reported - 6,592,409,084 cycles # 2.208 GHz - 12,843,659,599 instructions # 1.95 insn per cycle - 2.987368722 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1706) (512y: 18) (512z: 1427) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828052564145764E-002 -Relative difference = 1.9988585667912256e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt index e6d46e5965..ce1e039a32 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_14:51:09 +DATE: 2024-05-16_16:14:41 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 12 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.307514e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.195447e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.293637e+09 ) sec^-1 -MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.586214 sec +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=1, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.636778e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.079000e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.802981e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.372027e-02 +- 3.270769e-06 ) GeV^0 +TOTAL : 0.418904 sec INFO: No Floating Point Exceptions have been reported - 2,288,759,129 cycles # 2.822 GHz - 3,563,945,826 instructions # 1.56 insn per cycle - 0.869586754 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 964,065,439 cycles:u # 2.411 GHz (74.02%) + 2,055,912 stalled-cycles-frontend:u # 0.21% frontend cycles idle (73.92%) + 4,248,343 stalled-cycles-backend:u # 0.44% backend cycles idle (76.06%) + 1,780,527,270 instructions:u # 1.85 insn per cycle + # 0.00 stalled cycles per insn (76.85%) + 0.469503541 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828112125134794E-002 -Relative difference = 7.1815552823662555e-06 +Avg ME (F77/GPU) = 1.2828036033170065E-002 +Relative difference = 1.2498553996774023e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.594420e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.075669e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.075669e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 4.325231 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.895282e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.334516e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.334516e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 +TOTAL : 3.998723 sec INFO: No Floating Point Exceptions have been reported - 12,164,411,956 cycles # 2.810 GHz - 32,427,707,417 instructions # 2.67 insn per cycle - 4.330470336 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 312) (avx2: 0) (512y: 0) (512z: 0) + 13,090,879,494 cycles:u # 3.268 GHz (74.95%) + 38,767,383 stalled-cycles-frontend:u # 0.30% frontend cycles idle (75.04%) + 829,920,546 stalled-cycles-backend:u # 6.34% backend cycles idle (75.04%) + 37,991,928,680 instructions:u # 2.90 insn per cycle + # 0.02 stalled cycles per insn (75.05%) + 4.010067746 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 833) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039840314887E-002 -Relative difference = 1.244813035273009e-08 +Avg ME (F77/C++) = 1.2828039543819614E-002 +Relative difference = 3.5561191488957804e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.607791e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.429113e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.429113e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.796823 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 3.520980e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.457619e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.457619e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 +TOTAL : 2.459682 sec INFO: No Floating Point Exceptions have been reported - 8,009,111,157 cycles # 2.859 GHz - 18,657,618,704 instructions # 2.33 insn per cycle - 2.802139139 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1555) (avx2: 0) (512y: 0) (512z: 0) + 7,921,061,132 cycles:u # 3.211 GHz (75.03%) + 42,543,698 stalled-cycles-frontend:u # 0.54% frontend cycles idle (75.03%) + 559,552,662 stalled-cycles-backend:u # 7.06% backend cycles idle (75.03%) + 18,622,170,728 instructions:u # 2.35 insn per cycle + # 0.03 stalled cycles per insn (75.03%) + 2.472156987 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2808) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039283704129E-002 -Relative difference = 5.583829420356249e-08 +Avg ME (F77/C++) = 1.2828039385567536E-002 +Relative difference = 4.7897610623017996e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.719759e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.472647e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.472647e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.690741 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 3.743858e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.832207e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.832207e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 2.363179 sec INFO: No Floating Point Exceptions have been reported - 7,416,864,109 cycles # 2.752 GHz - 14,251,974,045 instructions # 1.92 insn per cycle - 2.696083346 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2237) (512y: 0) (512z: 0) + 7,395,191,570 cycles:u # 3.120 GHz (75.00%) + 44,075,937 stalled-cycles-frontend:u # 0.60% frontend cycles idle (75.03%) + 953,996,053 stalled-cycles-backend:u # 12.90% backend cycles idle (75.03%) + 14,331,799,005 instructions:u # 1.94 insn per cycle + # 0.07 stalled cycles per insn (75.03%) + 2.373336623 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2251) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053244447801E-002 -Relative difference = 2.5291823782248813e-07 +Avg ME (F77/C++) = 1.2828053337216261E-002 +Relative difference = 2.601499261602198e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.774877e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.630440e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.630440e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.645076 sec -INFO: No Floating Point Exceptions have been reported - 7,291,130,406 cycles # 2.752 GHz - 13,948,384,567 instructions # 1.91 insn per cycle - 2.650598467 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2096) (512y: 3) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053244447801E-002 -Relative difference = 2.5291823782248813e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.434115e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.741003e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.741003e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.962177 sec -INFO: No Floating Point Exceptions have been reported - 6,479,327,720 cycles # 2.184 GHz - 13,423,401,797 instructions # 2.07 insn per cycle - 2.967420151 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2071) (512y: 1) (512z: 1198) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828052562326775E-002 -Relative difference = 1.997440588685788e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt index 1851f3246c..ad9c8830ac 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_14:51:33 +DATE: 2024-05-16_16:14:55 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 12 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.308899e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.200904e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.329787e+09 ) sec^-1 -MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.585570 sec +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=1, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.851046e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.118410e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.853046e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.372027e-02 +- 3.270769e-06 ) GeV^0 +TOTAL : 0.385599 sec INFO: No Floating Point Exceptions have been reported - 2,293,480,451 cycles # 2.820 GHz - 3,552,176,680 instructions # 1.55 insn per cycle - 0.870100804 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 95 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 944,031,209 cycles:u # 2.364 GHz (74.16%) + 2,116,405 stalled-cycles-frontend:u # 0.22% frontend cycles idle (75.55%) + 5,084,891 stalled-cycles-backend:u # 0.54% backend cycles idle (76.50%) + 1,787,046,210 instructions:u # 1.89 insn per cycle + # 0.00 stalled cycles per insn (76.13%) + 0.436387294 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828112125134794E-002 -Relative difference = 7.1815552823662555e-06 +Avg ME (F77/GPU) = 1.2828036033170065E-002 +Relative difference = 1.2498553996774023e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.129885e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.080551e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.080551e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.330346 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.660871e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.598902e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.598902e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 +TOTAL : 3.041997 sec INFO: No Floating Point Exceptions have been reported - 9,425,530,261 cycles # 2.826 GHz - 25,263,309,757 instructions # 2.68 insn per cycle - 3.335509619 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 263) (avx2: 0) (512y: 0) (512z: 0) + 9,928,178,075 cycles:u # 3.255 GHz (74.85%) + 38,803,000 stalled-cycles-frontend:u # 0.39% frontend cycles idle (74.97%) + 19,233,961 stalled-cycles-backend:u # 0.19% backend cycles idle (75.09%) + 28,511,786,830 instructions:u # 2.87 insn per cycle + # 0.00 stalled cycles per insn (75.09%) + 3.053384571 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 632) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039838495897E-002 -Relative difference = 1.2589928273811243e-08 +Avg ME (F77/C++) = 1.2828039569285465E-002 +Relative difference = 3.357602059382168e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.953227e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.493970e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.493970e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.514743 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 3.871089e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.300213e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.300213e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 +TOTAL : 2.294096 sec INFO: No Floating Point Exceptions have been reported - 7,199,101,915 cycles # 2.858 GHz - 16,870,111,415 instructions # 2.34 insn per cycle - 2.520226033 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1360) (avx2: 0) (512y: 0) (512z: 0) + 7,395,021,784 cycles:u # 3.212 GHz (75.01%) + 41,885,141 stalled-cycles-frontend:u # 0.57% frontend cycles idle (74.98%) + 32,181,008 stalled-cycles-backend:u # 0.44% backend cycles idle (74.98%) + 16,782,482,676 instructions:u # 2.27 insn per cycle + # 0.00 stalled cycles per insn (74.98%) + 2.305228466 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2463) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039280066150E-002 -Relative difference = 5.612189004572479e-08 +Avg ME (F77/C++) = 1.2828039385567536E-002 +Relative difference = 4.7897610623017996e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.869871e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.903620e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.903620e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.568153 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 4.019915e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.467727e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.467727e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 2.246505 sec INFO: No Floating Point Exceptions have been reported - 7,089,400,745 cycles # 2.756 GHz - 13,616,924,187 instructions # 1.92 insn per cycle - 2.573571442 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2060) (512y: 0) (512z: 0) + 7,140,857,801 cycles:u # 3.168 GHz (74.81%) + 41,858,018 stalled-cycles-frontend:u # 0.59% frontend cycles idle (74.89%) + 382,399,808 stalled-cycles-backend:u # 5.36% backend cycles idle (75.06%) + 13,531,297,023 instructions:u # 1.89 insn per cycle + # 0.03 stalled cycles per insn (75.16%) + 2.257437630 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2082) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053220800939E-002 -Relative difference = 2.5107486628541925e-07 +Avg ME (F77/C++) = 1.2828053331759293E-002 +Relative difference = 2.597245327285885e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.911116e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.034757e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.034757e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.534380 sec -INFO: No Floating Point Exceptions have been reported - 7,042,060,221 cycles # 2.774 GHz - 13,426,671,587 instructions # 1.91 insn per cycle - 2.539847169 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1945) (512y: 4) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053220800939E-002 -Relative difference = 2.5107486628541925e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.525985e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.967300e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.967300e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.871748 sec -INFO: No Floating Point Exceptions have been reported - 6,325,625,286 cycles # 2.199 GHz - 13,154,721,049 instructions # 2.08 insn per cycle - 2.877120825 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2029) (512y: 1) (512z: 1083) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828052536860923E-002 -Relative difference = 1.977588895209662e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index b626a014f8..2c4ee4866d 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_14:34:05 +DATE: 2024-05-16_15:55:44 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.830025e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.944832e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.160865e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.662041 sec +EvtsPerSec[Rmb+ME] (23) = ( 5.236120e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.113245e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.341425e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 +TOTAL : 0.521510 sec INFO: No Floating Point Exceptions have been reported - 2,559,219,510 cycles # 2.861 GHz - 3,969,506,530 instructions # 1.55 insn per cycle - 0.952802853 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 1,325,273,277 cycles:u # 2.504 GHz (74.48%) + 2,275,284 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.52%) + 5,594,164 stalled-cycles-backend:u # 0.42% backend cycles idle (74.33%) + 2,060,185,234 instructions:u # 1.55 insn per cycle + # 0.00 stalled cycles per insn (74.50%) + 0.574291100 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039901590279E-002 -Relative difference = 7.671454200650844e-09 +Avg ME (F77/GPU) = 1.2828039901590281E-002 +Relative difference = 7.67145406542181e-09 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.044346e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.221236e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.221236e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.439706 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.211934e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.383197e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.383197e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 5.931262 sec INFO: No Floating Point Exceptions have been reported - 18,648,827,254 cycles # 2.894 GHz - 44,218,351,924 instructions # 2.37 insn per cycle - 6.444755062 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 439) (avx2: 0) (512y: 0) (512z: 0) + 19,577,391,797 cycles:u # 3.295 GHz (74.96%) + 51,699,483 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.96%) + 49,221,652 stalled-cycles-backend:u # 0.25% backend cycles idle (74.96%) + 47,079,701,413 instructions:u # 2.40 insn per cycle + # 0.00 stalled cycles per insn (74.97%) + 5.945284341 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 474) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.634240e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.158489e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.158489e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.270020 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.943982e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.472995e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.472995e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.978932 sec INFO: No Floating Point Exceptions have been reported - 12,337,216,169 cycles # 2.886 GHz - 30,918,100,190 instructions # 2.51 insn per cycle - 4.275170664 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1685) (avx2: 0) (512y: 0) (512z: 0) + 12,946,839,351 cycles:u # 3.245 GHz (74.96%) + 50,834,055 stalled-cycles-frontend:u # 0.39% frontend cycles idle (74.94%) + 2,028,242,845 stalled-cycles-backend:u # 15.67% backend cycles idle (74.95%) + 30,986,816,206 instructions:u # 2.39 insn per cycle + # 0.07 stalled cycles per insn (74.95%) + 3.994417915 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1667) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.943703e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.696046e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.696046e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.651391 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.584797e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.409726e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.409726e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.161946 sec INFO: No Floating Point Exceptions have been reported - 10,097,284,751 cycles # 2.762 GHz - 19,374,074,587 instructions # 1.92 insn per cycle - 3.656592402 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2130) (512y: 0) (512z: 0) + 10,298,446,422 cycles:u # 3.244 GHz (75.03%) + 49,424,298 stalled-cycles-frontend:u # 0.48% frontend cycles idle (75.05%) + 711,524,247 stalled-cycles-backend:u # 6.91% backend cycles idle (75.05%) + 19,483,845,697 instructions:u # 1.89 insn per cycle + # 0.04 stalled cycles per insn (75.05%) + 3.237591258 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2119) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.039225e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.880994e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.880994e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.498933 sec -INFO: No Floating Point Exceptions have been reported - 9,699,890,764 cycles # 2.769 GHz - 18,944,296,026 instructions # 1.95 insn per cycle - 3.504313379 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1860) (512y: 188) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039951670679E-002 -Relative difference = 3.767475112924841e-09 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.766168e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.359450e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.359450e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.977432 sec -INFO: No Floating Point Exceptions have been reported - 8,362,626,878 cycles # 2.101 GHz - 15,058,722,791 instructions # 1.80 insn per cycle - 3.982532855 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1024) (512y: 155) (512z: 1316) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039951670679E-002 -Relative difference = 3.767475112924841e-09 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt index f9780717c1..9ee3035b08 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_14:34:35 +DATE: 2024-05-16_15:56:03 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.831074e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.944999e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.163112e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.663702 sec +EvtsPerSec[Rmb+ME] (23) = ( 5.914851e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.589977e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.912358e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 +TOTAL : 0.487955 sec INFO: No Floating Point Exceptions have been reported - 2,550,713,530 cycles # 2.845 GHz - 3,995,712,636 instructions # 1.57 insn per cycle - 0.958037940 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 1,256,988,238 cycles:u # 2.486 GHz (75.00%) + 2,100,692 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.65%) + 4,451,453 stalled-cycles-backend:u # 0.35% backend cycles idle (74.64%) + 2,022,295,986 instructions:u # 1.61 insn per cycle + # 0.00 stalled cycles per insn (74.98%) + 0.540038021 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039901590279E-002 -Relative difference = 7.671454200650844e-09 +Avg ME (F77/GPU) = 1.2828039901590284E-002 +Relative difference = 7.67145379496374e-09 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.088480e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.281697e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.281697e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.193535 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.313434e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.511799e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.511799e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 5.518687 sec INFO: No Floating Point Exceptions have been reported - 17,967,058,694 cycles # 2.899 GHz - 42,467,805,223 instructions # 2.36 insn per cycle - 6.198684795 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 421) (avx2: 0) (512y: 0) (512z: 0) + 18,598,926,650 cycles:u # 3.363 GHz (74.98%) + 51,474,971 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.97%) + 37,621,317 stalled-cycles-backend:u # 0.20% backend cycles idle (74.90%) + 44,780,680,502 instructions:u # 2.41 insn per cycle + # 0.00 stalled cycles per insn (74.91%) + 5.533021930 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 498) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.676284e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.231904e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.231904e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.170684 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.026452e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.587298e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.587298e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.830226 sec INFO: No Floating Point Exceptions have been reported - 12,134,694,075 cycles # 2.906 GHz - 30,224,929,059 instructions # 2.49 insn per cycle - 4.175943490 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1692) (avx2: 0) (512y: 0) (512z: 0) + 12,685,903,200 cycles:u # 3.302 GHz (75.01%) + 52,800,557 stalled-cycles-frontend:u # 0.42% frontend cycles idle (75.01%) + 1,017,329,956 stalled-cycles-backend:u # 8.02% backend cycles idle (75.01%) + 30,187,527,006 instructions:u # 2.38 insn per cycle + # 0.03 stalled cycles per insn (75.01%) + 3.845010638 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1650) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.950812e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.735198e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.735198e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.639278 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.618417e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.462454e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.462454e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.131904 sec INFO: No Floating Point Exceptions have been reported - 10,078,657,444 cycles # 2.766 GHz - 19,257,126,653 instructions # 1.91 insn per cycle - 3.644365244 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2146) (512y: 0) (512z: 0) + 10,285,843,771 cycles:u # 3.272 GHz (74.84%) + 55,711,094 stalled-cycles-frontend:u # 0.54% frontend cycles idle (74.93%) + 243,673,079 stalled-cycles-backend:u # 2.37% backend cycles idle (75.06%) + 19,016,668,706 instructions:u # 1.85 insn per cycle + # 0.01 stalled cycles per insn (75.06%) + 3.157506078 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2072) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.049769e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.898049e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.898049e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.481211 sec -INFO: No Floating Point Exceptions have been reported - 9,647,917,970 cycles # 2.768 GHz - 18,746,418,128 instructions # 1.94 insn per cycle - 3.486360008 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1834) (512y: 191) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039951670679E-002 -Relative difference = 3.767475112924841e-09 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.796433e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.409552e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.409552e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.919050 sec -INFO: No Floating Point Exceptions have been reported - 8,244,471,456 cycles # 2.102 GHz - 14,980,246,059 instructions # 1.82 insn per cycle - 3.924194596 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1021) (512y: 156) (512z: 1305) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039951670679E-002 -Relative difference = 3.767475112924841e-09 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index 205a4bf5b6..5633007fba 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_14:35:05 +DATE: 2024-05-16_15:56:20 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.201162e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.181610e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.277713e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.525559 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.874005e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.955473e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.009544e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 +TOTAL : 0.382065 sec INFO: No Floating Point Exceptions have been reported - 2,155,305,398 cycles # 2.849 GHz - 3,120,666,963 instructions # 1.45 insn per cycle - 0.814520269 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 904,604,882 cycles:u # 2.294 GHz (74.56%) + 2,038,263 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.90%) + 5,243,681 stalled-cycles-backend:u # 0.58% backend cycles idle (75.68%) + 1,384,022,248 instructions:u # 1.53 insn per cycle + # 0.00 stalled cycles per insn (75.84%) + 0.434083725 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 +Avg ME (F77/GPU) = 2.0288063388516817 +Relative difference = 3.258803416564443e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.068773e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.129905e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.129905e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.176250 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.550674e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.617768e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.617768e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 4.290394 sec INFO: No Floating Point Exceptions have been reported - 15,001,077,825 cycles # 2.896 GHz - 38,374,710,401 instructions # 2.56 insn per cycle - 5.181415080 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) + 14,781,492,998 cycles:u # 3.436 GHz (74.90%) + 9,777,978 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.90%) + 1,006,773,006 stalled-cycles-backend:u # 6.81% backend cycles idle (74.99%) + 38,652,569,109 instructions:u # 2.61 insn per cycle + # 0.03 stalled cycles per insn (75.08%) + 4.305596735 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 719) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388515645 -Relative difference = 3.258803994438787e-07 +Avg ME (F77/C++) = 2.0288063388515649 +Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.492980e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.684039e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.684039e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.109528 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 4.401958e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.619011e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.619011e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.562632 sec INFO: No Floating Point Exceptions have been reported - 9,049,547,879 cycles # 2.906 GHz - 24,578,150,431 instructions # 2.72 insn per cycle - 3.114795475 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) + 8,701,354,415 cycles:u # 3.380 GHz (74.84%) + 9,183,222 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.83%) + 765,087,646 stalled-cycles-backend:u # 8.79% backend cycles idle (74.99%) + 24,343,097,716 instructions:u # 2.80 insn per cycle + # 0.03 stalled cycles per insn (75.14%) + 2.577619509 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2067) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.554648e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.034559e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.034559e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.996517 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 7.701210e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.295923e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.295923e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.542202 sec INFO: No Floating Point Exceptions have been reported - 5,443,502,791 cycles # 2.721 GHz - 11,251,469,346 instructions # 2.07 insn per cycle - 2.001703471 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) + 5,119,297,379 cycles:u # 3.295 GHz (74.82%) + 8,889,841 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.79%) + 18,590,632 stalled-cycles-backend:u # 0.36% backend cycles idle (74.79%) + 11,535,225,652 instructions:u # 2.25 insn per cycle + # 0.00 stalled cycles per insn (74.91%) + 1.557171986 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2396) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.119114e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.713742e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.713742e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.821745 sec -INFO: No Floating Point Exceptions have been reported - 4,960,408,882 cycles # 2.716 GHz - 10,558,806,229 instructions # 2.13 insn per cycle - 1.826903839 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2074) (512y: 144) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.693426e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.898518e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.898518e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.947911 sec -INFO: No Floating Point Exceptions have been reported - 5,367,244,097 cycles # 1.818 GHz - 7,793,958,391 instructions # 1.45 insn per cycle - 2.953294554 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1542) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt index 4b2366d44f..f570d067f3 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt @@ -1,231 +1,182 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:00:55 +DATE: 2024-05-16_16:27:11 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --bridge OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.373758e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.924060e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.924060e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.825705 sec +EvtsPerSec[Rmb+ME] (23) = ( 5.980452e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.801512e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.801512e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.304292 sec INFO: No Floating Point Exceptions have been reported - 3,037,157,201 cycles # 2.832 GHz - 4,768,877,833 instructions # 1.57 insn per cycle - 1.128818887 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 3,721,346,618 cycles:u # 2.842 GHz (74.99%) + 38,545,810 stalled-cycles-frontend:u # 1.04% frontend cycles idle (74.96%) + 1,166,119,036 stalled-cycles-backend:u # 31.34% backend cycles idle (74.96%) + 3,843,288,243 instructions:u # 1.03 insn per cycle + # 0.30 stalled cycles per insn (75.19%) + 1.359924619 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 +Avg ME (F77/GPU) = 2.0288063388516817 +Relative difference = 3.258803416564443e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.032947e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.092197e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.092197e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.344113 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.478658e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.546075e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.546075e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 4.504195 sec INFO: No Floating Point Exceptions have been reported - 15,315,317,736 cycles # 2.863 GHz - 38,433,762,310 instructions # 2.51 insn per cycle - 5.351126978 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) + 14,881,269,953 cycles:u # 3.289 GHz (74.90%) + 9,330,152 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.95%) + 1,034,575,778 stalled-cycles-backend:u # 6.95% backend cycles idle (75.04%) + 38,662,303,687 instructions:u # 2.60 insn per cycle + # 0.03 stalled cycles per insn (75.07%) + 4.527654262 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 719) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388515645 -Relative difference = 3.258803994438787e-07 +Avg ME (F77/C++) = 2.0288063388515649 +Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.394451e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.578816e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.578816e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.279010 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 4.219313e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.422253e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.422253e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.756042 sec INFO: No Floating Point Exceptions have been reported - 9,390,215,737 cycles # 2.859 GHz - 24,761,602,813 instructions # 2.64 insn per cycle - 3.285914811 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) + 8,830,553,012 cycles:u # 3.180 GHz (74.97%) + 9,217,198 stalled-cycles-frontend:u # 0.10% frontend cycles idle (74.95%) + 784,099,125 stalled-cycles-backend:u # 8.88% backend cycles idle (74.95%) + 24,543,777,422 instructions:u # 2.78 insn per cycle + # 0.03 stalled cycles per insn (74.95%) + 2.780608984 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2067) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.346272e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.804430e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.804430e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.151067 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 7.530911e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.101861e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.101861e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.659856 sec INFO: No Floating Point Exceptions have been reported - 5,795,064,676 cycles # 2.687 GHz - 11,538,955,643 instructions # 1.99 insn per cycle - 2.157987463 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) + 5,313,127,231 cycles:u # 3.161 GHz (74.80%) + 9,346,577 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.76%) + 36,431,573 stalled-cycles-backend:u # 0.69% backend cycles idle (74.81%) + 11,756,634,523 instructions:u # 2.21 insn per cycle + # 0.00 stalled cycles per insn (75.06%) + 1.684064569 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2396) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.949125e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.512113e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.512113e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.953091 sec -INFO: No Floating Point Exceptions have been reported - 5,277,608,562 cycles # 2.695 GHz - 10,845,633,589 instructions # 2.06 insn per cycle - 1.960046746 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2074) (512y: 144) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.545325e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.736253e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.736253e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.149111 sec -INFO: No Floating Point Exceptions have been reported - 5,725,568,726 cycles # 1.815 GHz - 8,037,864,149 instructions # 1.40 insn per cycle - 3.156036160 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1542) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt index 66fdf9efe4..485bcd5541 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:11:59 +DATE: 2024-05-16_16:34:18 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --common OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.582142e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.158915e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.274993e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.147117e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.956770e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.010899e+07 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 0.629579 sec +TOTAL : 1.241847 sec INFO: No Floating Point Exceptions have been reported - 2,438,671,292 cycles # 2.828 GHz - 3,557,518,240 instructions # 1.46 insn per cycle - 0.918692112 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 3,207,936,408 cycles:u # 2.928 GHz (75.24%) + 27,242,339 stalled-cycles-frontend:u # 0.85% frontend cycles idle (75.23%) + 1,155,199,922 stalled-cycles-backend:u # 36.01% backend cycles idle (74.84%) + 2,971,859,727 instructions:u # 0.93 insn per cycle + # 0.39 stalled cycles per insn (74.57%) + 1.292980293 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 +Avg ME (F77/GPU) = 2.0288063388516817 +Relative difference = 3.258803416564443e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.063642e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.124319e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.124319e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.527489e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.593674e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.593674e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 5.250994 sec +TOTAL : 4.330667 sec INFO: No Floating Point Exceptions have been reported - 15,177,224,624 cycles # 2.888 GHz - 38,389,589,114 instructions # 2.53 insn per cycle - 5.256694767 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) + 14,767,530,206 cycles:u # 3.400 GHz (74.95%) + 8,678,268 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.96%) + 1,014,325,533 stalled-cycles-backend:u # 6.87% backend cycles idle (74.96%) + 38,729,864,130 instructions:u # 2.62 insn per cycle + # 0.03 stalled cycles per insn (74.95%) + 4.344758665 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 719) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388515645 -Relative difference = 3.258803994438787e-07 +Avg ME (F77/C++) = 2.0288063388515649 +Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.469246e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.659787e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.659787e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 4.416414e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.635509e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.635509e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.192043 sec +TOTAL : 2.557400 sec INFO: No Floating Point Exceptions have been reported - 9,234,869,625 cycles # 2.889 GHz - 24,577,322,685 instructions # 2.66 insn per cycle - 3.197667860 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) + 8,688,901,571 cycles:u # 3.382 GHz (74.78%) + 8,925,341 stalled-cycles-frontend:u # 0.10% frontend cycles idle (74.89%) + 771,198,437 stalled-cycles-backend:u # 8.88% backend cycles idle (75.05%) + 24,330,529,250 instructions:u # 2.80 insn per cycle + # 0.03 stalled cycles per insn (75.09%) + 2.570508953 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2067) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.520662e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.999169e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.999169e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 7.657527e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.246679e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.246679e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.069942 sec +TOTAL : 1.552285 sec INFO: No Floating Point Exceptions have been reported - 5,642,462,557 cycles # 2.720 GHz - 11,233,692,701 instructions # 1.99 insn per cycle - 2.075542898 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) + 5,104,259,143 cycles:u # 3.263 GHz (74.95%) + 8,574,846 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.94%) + 17,602,340 stalled-cycles-backend:u # 0.34% backend cycles idle (74.94%) + 11,520,617,764 instructions:u # 2.26 insn per cycle + # 0.00 stalled cycles per insn (74.96%) + 1.590665734 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2396) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.151383e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.740134e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.740134e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.875924 sec -INFO: No Floating Point Exceptions have been reported - 5,122,190,825 cycles # 2.724 GHz - 10,508,387,782 instructions # 2.05 insn per cycle - 1.881606947 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2074) (512y: 144) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.617306e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.815381e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.815381e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.070946 sec -INFO: No Floating Point Exceptions have been reported - 5,582,158,144 cycles # 1.816 GHz - 7,742,870,902 instructions # 1.39 insn per cycle - 3.076599052 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1542) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt index 3db0a99453..184133b4b5 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt @@ -1,220 +1,176 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:06:28 +DATE: 2024-05-16_16:32:07 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 --rmbhst OMP= +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.591450e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.156507e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.275190e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.728834 sec +EvtsPerSec[Rmb+ME] (23) = ( 6.838206e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.944221e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.997743e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.178844 sec INFO: No Floating Point Exceptions have been reported - 2,711,621,820 cycles # 2.826 GHz - 4,288,575,941 instructions # 1.58 insn per cycle - 1.017933550 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 3,636,585,685 cycles:u # 3.017 GHz (74.97%) + 39,001,917 stalled-cycles-frontend:u # 1.07% frontend cycles idle (75.09%) + 1,153,410,687 stalled-cycles-backend:u # 31.72% backend cycles idle (74.79%) + 3,894,627,156 instructions:u # 1.07 insn per cycle + # 0.30 stalled cycles per insn (74.83%) + 1.230751921 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 +Avg ME (F77/GPU) = 2.0288063388516817 +Relative difference = 3.258803416564443e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.045774e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.105988e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.105988e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.234926 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.527013e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.595795e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.595795e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 4.343540 sec INFO: No Floating Point Exceptions have been reported - 14,996,539,700 cycles # 2.862 GHz - 38,373,492,139 instructions # 2.56 insn per cycle - 5.240540958 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) + 14,768,806,534 cycles:u # 3.391 GHz (74.96%) + 9,491,572 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.02%) + 1,010,953,915 stalled-cycles-backend:u # 6.85% backend cycles idle (75.02%) + 38,646,076,600 instructions:u # 2.62 insn per cycle + # 0.03 stalled cycles per insn (75.02%) + 4.360153127 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 719) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388515645 -Relative difference = 3.258803994438787e-07 +Avg ME (F77/C++) = 2.0288063388515649 +Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.431303e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.617753e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.617753e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.165641 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 4.256539e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.461112e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.461112e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.644582 sec INFO: No Floating Point Exceptions have been reported - 9,072,261,960 cycles # 2.862 GHz - 24,578,342,604 instructions # 2.71 insn per cycle - 3.171145800 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) + 8,627,687,881 cycles:u # 3.250 GHz (75.00%) + 8,854,719 stalled-cycles-frontend:u # 0.10% frontend cycles idle (74.99%) + 774,635,987 stalled-cycles-backend:u # 8.98% backend cycles idle (74.99%) + 24,443,738,037 instructions:u # 2.83 insn per cycle + # 0.03 stalled cycles per insn (74.99%) + 2.656284712 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2067) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.460196e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.936686e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.936686e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.030403 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 7.520387e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.094489e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.094489e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.575969 sec INFO: No Floating Point Exceptions have been reported - 5,452,336,471 cycles # 2.679 GHz - 11,251,160,510 instructions # 2.06 insn per cycle - 2.035938093 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) + 5,140,085,504 cycles:u # 3.239 GHz (74.81%) + 11,584,804 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.80%) + 19,445,142 stalled-cycles-backend:u # 0.38% backend cycles idle (74.80%) + 11,526,520,975 instructions:u # 2.24 insn per cycle + # 0.00 stalled cycles per insn (75.05%) + 1.588695948 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2396) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.063893e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.649981e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.649981e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.838982 sec -INFO: No Floating Point Exceptions have been reported - 4,938,631,038 cycles # 2.680 GHz - 10,556,930,414 instructions # 2.14 insn per cycle - 1.844618889 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2074) (512y: 144) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.589787e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.785615e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.785615e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.030446 sec -INFO: No Floating Point Exceptions have been reported - 5,385,276,295 cycles # 1.774 GHz - 7,793,583,016 instructions # 1.45 insn per cycle - 3.036161028 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1542) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt index 0caf1293cf..94cda4d72c 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_14:35:28 +DATE: 2024-05-16_15:56:34 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.206695e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.183658e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.279171e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.521961 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.844801e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.918896e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.971923e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 +TOTAL : 0.392057 sec INFO: No Floating Point Exceptions have been reported - 2,148,802,757 cycles # 2.845 GHz - 3,054,152,486 instructions # 1.42 insn per cycle - 0.812117976 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 870,834,906 cycles:u # 2.227 GHz (74.83%) + 2,194,838 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.48%) + 5,125,670 stalled-cycles-backend:u # 0.59% backend cycles idle (75.64%) + 1,436,621,970 instructions:u # 1.65 insn per cycle + # 0.00 stalled cycles per insn (75.78%) + 0.442672083 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 +Avg ME (F77/GPU) = 2.0288063388516817 +Relative difference = 3.258803416564443e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.068168e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.129039e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.129039e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.177399 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.452054e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.513152e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.513152e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 4.457161 sec INFO: No Floating Point Exceptions have been reported - 15,011,872,798 cycles # 2.897 GHz - 40,100,761,049 instructions # 2.67 insn per cycle - 5.182501125 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 669) (avx2: 0) (512y: 0) (512z: 0) + 15,328,825,337 cycles:u # 3.430 GHz (74.95%) + 9,136,960 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.95%) + 203,789,958 stalled-cycles-backend:u # 1.33% backend cycles idle (74.95%) + 39,537,916,360 instructions:u # 2.58 insn per cycle + # 0.01 stalled cycles per insn (74.98%) + 4.472712291 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 596) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.634343e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.844834e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.844834e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.993727 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 4.418545e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.641773e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.641773e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.552962 sec INFO: No Floating Point Exceptions have been reported - 8,671,029,072 cycles # 2.892 GHz - 23,670,969,931 instructions # 2.73 insn per cycle - 2.999072752 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2072) (avx2: 0) (512y: 0) (512z: 0) + 8,651,232,050 cycles:u # 3.373 GHz (74.99%) + 9,885,837 stalled-cycles-frontend:u # 0.11% frontend cycles idle (75.05%) + 1,229,260,585 stalled-cycles-backend:u # 14.21% backend cycles idle (75.05%) + 23,522,160,140 instructions:u # 2.72 insn per cycle + # 0.05 stalled cycles per insn (75.05%) + 2.567902454 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1948) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.945254e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.323667e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.323667e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.228700 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 6.876715e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.350584e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.350584e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.708099 sec INFO: No Floating Point Exceptions have been reported - 6,081,438,462 cycles # 2.724 GHz - 13,061,002,322 instructions # 2.15 insn per cycle - 2.233958089 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2546) (512y: 0) (512z: 0) + 5,677,260,445 cycles:u # 3.298 GHz (74.98%) + 13,332,118 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.91%) + 770,564,901 stalled-cycles-backend:u # 13.57% backend cycles idle (74.69%) + 13,180,087,374 instructions:u # 2.32 insn per cycle + # 0.06 stalled cycles per insn (74.69%) + 1.724279096 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2560) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.205594e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.622405e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.622405e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.121856 sec -INFO: No Floating Point Exceptions have been reported - 5,798,891,312 cycles # 2.727 GHz - 12,319,969,769 instructions # 2.12 insn per cycle - 2.127030294 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2093) (512y: 294) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.380432e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.550251e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.550251e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.209519 sec -INFO: No Floating Point Exceptions have been reported - 5,821,355,640 cycles # 1.812 GHz - 9,603,981,726 instructions # 1.65 insn per cycle - 3.214724733 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1509) (512y: 209) (512z: 1971) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt index 6af05ea7e1..7cea2da9aa 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_14:51:55 +DATE: 2024-05-16_16:15:08 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.681198e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.166116e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.276872e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.529611 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.090215e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.962110e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.016244e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 +TOTAL : 0.402423 sec INFO: No Floating Point Exceptions have been reported - 2,190,477,637 cycles # 2.832 GHz - 3,135,955,530 instructions # 1.43 insn per cycle - 0.830299558 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 857,611,074 cycles:u # 2.179 GHz (75.51%) + 2,136,985 stalled-cycles-frontend:u # 0.25% frontend cycles idle (74.78%) + 5,438,902 stalled-cycles-backend:u # 0.63% backend cycles idle (74.78%) + 1,371,416,540 instructions:u # 1.60 insn per cycle + # 0.00 stalled cycles per insn (75.96%) + 0.454987208 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 +Avg ME (F77/GPU) = 2.0288063388516817 +Relative difference = 3.258803416564443e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.383572e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.466296e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.466296e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.507918 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.815405e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.902732e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.902732e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 3.910887 sec INFO: No Floating Point Exceptions have been reported - 13,013,442,526 cycles # 2.884 GHz - 34,387,029,075 instructions # 2.64 insn per cycle - 4.513459426 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 686) (avx2: 0) (512y: 0) (512z: 0) + 13,171,113,764 cycles:u # 3.358 GHz (74.93%) + 9,313,023 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.92%) + 1,409,492,709 stalled-cycles-backend:u # 10.70% backend cycles idle (74.92%) + 35,891,557,595 instructions:u # 2.73 insn per cycle + # 0.04 stalled cycles per insn (74.95%) + 3.932336347 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1078) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.946707e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.083881e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.083881e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.667816 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 4.326543e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.537107e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.537107e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.604640 sec INFO: No Floating Point Exceptions have been reported - 10,591,846,077 cycles # 2.884 GHz - 24,007,245,790 instructions # 2.27 insn per cycle - 3.673406920 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2582) (avx2: 0) (512y: 0) (512z: 0) + 8,622,137,899 cycles:u # 3.296 GHz (74.95%) + 9,155,093 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.93%) + 2,422,668,392 stalled-cycles-backend:u # 28.10% backend cycles idle (74.93%) + 21,931,398,005 instructions:u # 2.54 insn per cycle + # 0.11 stalled cycles per insn (74.94%) + 2.619209121 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2334) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.532632e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.849376e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.849376e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.423240 sec -INFO: No Floating Point Exceptions have been reported - 6,577,855,979 cycles # 2.709 GHz - 12,401,365,684 instructions # 1.89 insn per cycle - 2.428791768 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3154) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516200 -Relative difference = 3.2588037208240405e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.754457e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.104775e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.104775e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.314834 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 6.667518e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.109828e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.109828e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.753724 sec INFO: No Floating Point Exceptions have been reported - 6,233,998,487 cycles # 2.688 GHz - 11,576,068,199 instructions # 1.86 insn per cycle - 2.320534715 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2690) (512y: 239) (512z: 0) + 5,807,189,734 cycles:u # 3.290 GHz (75.10%) + 8,245,915 stalled-cycles-frontend:u # 0.14% frontend cycles idle (75.07%) + 1,884,690,751 stalled-cycles-backend:u # 32.45% backend cycles idle (75.07%) + 11,992,379,155 instructions:u # 2.07 insn per cycle + # 0.16 stalled cycles per insn (75.07%) + 1.767919104 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3062) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516200 -Relative difference = 3.2588037208240405e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.687851e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.893233e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.893233e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.952132 sec -INFO: No Floating Point Exceptions have been reported - 5,323,772,693 cycles # 1.802 GHz - 9,296,912,008 instructions # 1.75 insn per cycle - 2.957828928 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2115) (512y: 282) (512z: 1958) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +========================================================================= +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt index 2040ec21eb..638eaa15d2 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_14:52:19 +DATE: 2024-05-16_16:15:22 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.680230e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.168644e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.280417e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.532171 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.856498e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.922856e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.976149e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 +TOTAL : 0.373820 sec INFO: No Floating Point Exceptions have been reported - 2,169,507,018 cycles # 2.828 GHz - 3,115,355,964 instructions # 1.44 insn per cycle - 0.826043020 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 899,977,017 cycles:u # 2.255 GHz (75.57%) + 2,145,266 stalled-cycles-frontend:u # 0.24% frontend cycles idle (75.52%) + 5,316,146 stalled-cycles-backend:u # 0.59% backend cycles idle (74.03%) + 1,515,438,363 instructions:u # 1.68 insn per cycle + # 0.00 stalled cycles per insn (73.84%) + 0.425285387 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 +Avg ME (F77/GPU) = 2.0288063388516817 +Relative difference = 3.258803416564443e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.524819e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.617052e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.617052e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.262483 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 3.238329e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.346767e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.346767e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 3.423715 sec INFO: No Floating Point Exceptions have been reported - 12,358,560,610 cycles # 2.896 GHz - 35,037,446,637 instructions # 2.84 insn per cycle - 4.268207887 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 457) (avx2: 0) (512y: 0) (512z: 0) + 11,725,161,803 cycles:u # 3.413 GHz (74.80%) + 8,705,696 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.92%) + 64,791,846 stalled-cycles-backend:u # 0.55% backend cycles idle (75.04%) + 35,634,475,574 instructions:u # 3.04 insn per cycle + # 0.00 stalled cycles per insn (75.09%) + 3.438838371 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 469) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.908483e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.040450e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.040450e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.714757 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 4.824583e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.088173e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.088173e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.353161 sec INFO: No Floating Point Exceptions have been reported - 10,745,562,014 cycles # 2.889 GHz - 23,084,374,218 instructions # 2.15 insn per cycle - 3.720383315 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2363) (avx2: 0) (512y: 0) (512z: 0) + 7,944,878,439 cycles:u # 3.360 GHz (74.97%) + 9,601,207 stalled-cycles-frontend:u # 0.12% frontend cycles idle (74.97%) + 1,446,990,839 stalled-cycles-backend:u # 18.21% backend cycles idle (74.97%) + 21,237,436,715 instructions:u # 2.67 insn per cycle + # 0.07 stalled cycles per insn (74.98%) + 2.371216486 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2088) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.878271e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.246530e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.246530e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.258864 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 7.972827e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.617255e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.617255e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.497448 sec INFO: No Floating Point Exceptions have been reported - 6,151,591,588 cycles # 2.717 GHz - 11,956,808,073 instructions # 1.94 insn per cycle - 2.264473200 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2509) (512y: 0) (512z: 0) + 4,959,076,198 cycles:u # 3.286 GHz (74.77%) + 8,817,217 stalled-cycles-frontend:u # 0.18% frontend cycles idle (75.09%) + 186,742,422 stalled-cycles-backend:u # 3.77% backend cycles idle (75.09%) + 11,331,742,009 instructions:u # 2.29 insn per cycle + # 0.02 stalled cycles per insn (75.09%) + 1.514537087 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2370) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.958079e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.345089e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.345089e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.224234 sec -INFO: No Floating Point Exceptions have been reported - 6,017,653,055 cycles # 2.700 GHz - 11,128,128,624 instructions # 1.85 insn per cycle - 2.229785356 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2126) (512y: 174) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.739650e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.951827e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.951827e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.913360 sec -INFO: No Floating Point Exceptions have been reported - 5,212,798,448 cycles # 1.786 GHz - 9,020,884,070 instructions # 1.73 insn per cycle - 2.919040069 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1650) (512y: 208) (512z: 1567) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 93f412dad4..b6a754e57c 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_14:35:52 +DATE: 2024-05-16_15:56:48 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.088595e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.705968e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.969781e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.482195 sec +EvtsPerSec[Rmb+ME] (23) = ( 9.535931e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.912917e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.071279e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.078079e+00 +- 3.394933e-03 ) GeV^0 +TOTAL : 0.327484 sec INFO: No Floating Point Exceptions have been reported - 2,007,920,858 cycles # 2.849 GHz - 2,840,933,430 instructions # 1.41 insn per cycle - 0.763422225 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 773,236,796 cycles:u # 2.264 GHz (74.45%) + 2,124,472 stalled-cycles-frontend:u # 0.27% frontend cycles idle (74.63%) + 4,788,078 stalled-cycles-backend:u # 0.62% backend cycles idle (74.65%) + 1,358,147,244 instructions:u # 1.76 insn per cycle + # 0.00 stalled cycles per insn (75.30%) + 0.383958616 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028811e+00 -Avg ME (F77/GPU) = 2.0288499749731272 -Relative difference = 1.9210746159747678e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 2.028815e+00 +Avg ME (F77/GPU) = 2.0288173652952537 +Relative difference = 1.1658506339321586e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.200574e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.271569e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.271569e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.850875 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.997580e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.087776e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.087776e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 3.645698 sec INFO: No Floating Point Exceptions have been reported - 14,073,569,281 cycles # 2.899 GHz - 38,343,239,881 instructions # 2.72 insn per cycle - 4.855897587 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) + 12,589,223,314 cycles:u # 3.445 GHz (74.87%) + 6,876,391 stalled-cycles-frontend:u # 0.05% frontend cycles idle (75.05%) + 1,258,354,294 stalled-cycles-backend:u # 10.00% backend cycles idle (75.05%) + 37,054,233,567 instructions:u # 2.94 insn per cycle + # 0.03 stalled cycles per insn (75.05%) + 3.657725980 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 578) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288199022179469 -Relative difference = 4.819651478256564e-08 +Avg ME (F77/C++) = 2.0288198367925361 +Relative difference = 8.044452636897417e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.925449e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.332953e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.332953e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.217076 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 5.721418e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.095346e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.095346e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 +TOTAL : 1.986765 sec INFO: No Floating Point Exceptions have been reported - 6,436,588,824 cycles # 2.899 GHz - 15,815,821,412 instructions # 2.46 insn per cycle - 2.222049918 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2690) (avx2: 0) (512y: 0) (512z: 0) + 6,105,516,697 cycles:u # 3.060 GHz (74.78%) + 7,043,845 stalled-cycles-frontend:u # 0.12% frontend cycles idle (74.97%) + 2,117,701,849 stalled-cycles-backend:u # 34.69% backend cycles idle (75.14%) + 15,144,529,164 instructions:u # 2.48 insn per cycle + # 0.14 stalled cycles per insn (75.14%) + 1.998669943 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2459) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288193548331037 -Relative difference = 1.748963824709674e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028820e+00 +Avg ME (F77/C++) = 2.0288198773050681 +Relative difference = 6.047600673895608e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.963004e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.029520e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.029520e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.256665 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.075653e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.212925e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.212925e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 1.139711 sec INFO: No Floating Point Exceptions have been reported - 3,455,760,948 cycles # 2.740 GHz - 7,593,976,565 instructions # 2.20 insn per cycle - 1.261861875 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3049) (512y: 0) (512z: 0) + 3,319,451,666 cycles:u # 2.889 GHz (75.10%) + 7,425,440 stalled-cycles-frontend:u # 0.22% frontend cycles idle (74.96%) + 1,104,222,098 stalled-cycles-backend:u # 33.27% backend cycles idle (74.94%) + 7,606,072,308 instructions:u # 2.29 insn per cycle + # 0.15 stalled cycles per insn (74.93%) + 1.152514939 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3071) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181684445590 -Relative difference = 8.302595855806234e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288186294492334 +Relative difference = 1.826435805832187e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.569986e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.110539e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.110539e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.182427 sec -INFO: No Floating Point Exceptions have been reported - 3,244,770,474 cycles # 2.734 GHz - 7,203,559,407 instructions # 2.22 insn per cycle - 1.187623854 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2849) (512y: 23) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181684445590 -Relative difference = 8.302595855806234e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.864494e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.605662e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.605662e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.614546 sec -INFO: No Floating Point Exceptions have been reported - 3,050,749,421 cycles # 1.885 GHz - 5,835,755,685 instructions # 1.91 insn per cycle - 1.619564037 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2368) (512y: 24) (512z: 1888) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288183349184692 -Relative difference = 1.6508058850146622e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt index 426db838d7..139359cba7 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt @@ -1,231 +1,182 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:01:19 +DATE: 2024-05-16_16:27:27 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --bridge OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.801236e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.462846e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.462846e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 -TOTAL : 0.684862 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.423820e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.053969e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.053969e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.079682e+00 +- 3.408341e-03 ) GeV^0 +TOTAL : 1.153350 sec INFO: No Floating Point Exceptions have been reported - 2,586,573,508 cycles # 2.828 GHz - 4,016,406,941 instructions # 1.55 insn per cycle - 0.971565490 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 3,570,722,038 cycles:u # 3.020 GHz (75.03%) + 21,900,913 stalled-cycles-frontend:u # 0.61% frontend cycles idle (75.10%) + 1,143,439,983 stalled-cycles-backend:u # 32.02% backend cycles idle (75.08%) + 3,893,708,783 instructions:u # 1.09 insn per cycle + # 0.29 stalled cycles per insn (74.61%) + 1.205835268 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028811e+00 -Avg ME (F77/GPU) = 2.0288499749731272 -Relative difference = 1.9210746159747678e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 2.028815e+00 +Avg ME (F77/GPU) = 2.0288173652952537 +Relative difference = 1.1658506339321586e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.176436e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.247449e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.247449e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.948763 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.954737e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.042572e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.042572e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 3.733062 sec INFO: No Floating Point Exceptions have been reported - 14,176,104,430 cycles # 2.862 GHz - 38,383,843,895 instructions # 2.71 insn per cycle - 4.955194603 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) + 12,647,578,901 cycles:u # 3.377 GHz (74.99%) + 7,387,293 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.01%) + 1,284,454,821 stalled-cycles-backend:u # 10.16% backend cycles idle (75.01%) + 37,049,023,071 instructions:u # 2.93 insn per cycle + # 0.03 stalled cycles per insn (75.01%) + 3.748399047 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 578) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288199022179469 -Relative difference = 4.819651478256564e-08 +Avg ME (F77/C++) = 2.0288198367925361 +Relative difference = 8.044452636897417e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.809798e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.200764e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.200764e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.315849 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 6.263371e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.701339e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.701339e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 +TOTAL : 1.870100 sec INFO: No Floating Point Exceptions have been reported - 6,633,418,276 cycles # 2.858 GHz - 16,095,968,093 instructions # 2.43 insn per cycle - 2.322298973 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2690) (avx2: 0) (512y: 0) (512z: 0) + 6,176,943,234 cycles:u # 3.281 GHz (74.96%) + 6,834,359 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.93%) + 2,157,535,934 stalled-cycles-backend:u # 34.93% backend cycles idle (74.93%) + 15,462,240,191 instructions:u # 2.50 insn per cycle + # 0.14 stalled cycles per insn (74.95%) + 1.886098189 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2459) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288193548331037 -Relative difference = 1.748963824709674e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028820e+00 +Avg ME (F77/C++) = 2.0288198773050681 +Relative difference = 6.047600673895608e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.679036e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.925640e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.925640e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.342355 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.207397e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.360799e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.360799e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 1.057297 sec INFO: No Floating Point Exceptions have been reported - 3,640,592,514 cycles # 2.701 GHz - 7,831,268,120 instructions # 2.15 insn per cycle - 1.348786146 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3049) (512y: 0) (512z: 0) + 3,423,379,937 cycles:u # 3.201 GHz (74.59%) + 7,355,299 stalled-cycles-frontend:u # 0.21% frontend cycles idle (74.79%) + 1,119,764,046 stalled-cycles-backend:u # 32.71% backend cycles idle (75.16%) + 7,862,426,800 instructions:u # 2.30 insn per cycle + # 0.14 stalled cycles per insn (75.32%) + 1.072734118 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3071) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181684445590 -Relative difference = 8.302595855806234e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288186294492334 +Relative difference = 1.826435805832187e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.163700e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.056629e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.056629e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.278871 sec -INFO: No Floating Point Exceptions have been reported - 3,437,646,895 cycles # 2.676 GHz - 7,439,842,858 instructions # 2.16 insn per cycle - 1.285386542 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2849) (512y: 23) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181684445590 -Relative difference = 8.302595855806234e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.597215e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.292791e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.292791e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.727252 sec -INFO: No Floating Point Exceptions have been reported - 3,258,697,081 cycles # 1.881 GHz - 6,089,840,836 instructions # 1.87 insn per cycle - 1.733818978 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2368) (512y: 24) (512z: 1888) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288183349184692 -Relative difference = 1.6508058850146622e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt index 884891874e..c7f370ba8f 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:12:23 +DATE: 2024-05-16_16:34:32 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --common OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.468958e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.648278e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.971571e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.079446e+00 +- 3.403306e-03 ) GeV^0 -TOTAL : 0.575797 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.297408e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.914963e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.074175e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.080169e+00 +- 3.463853e-03 ) GeV^0 +TOTAL : 1.034607 sec INFO: No Floating Point Exceptions have been reported - 2,271,357,910 cycles # 2.845 GHz - 3,342,640,625 instructions # 1.47 insn per cycle - 0.855647595 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 3,110,732,271 cycles:u # 3.009 GHz (74.81%) + 10,895,003 stalled-cycles-frontend:u # 0.35% frontend cycles idle (74.84%) + 1,144,475,117 stalled-cycles-backend:u # 36.79% backend cycles idle (75.28%) + 2,805,564,530 instructions:u # 0.90 insn per cycle + # 0.41 stalled cycles per insn (75.34%) + 1.080274590 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028811e+00 -Avg ME (F77/GPU) = 2.0288499749731272 -Relative difference = 1.9210746159747678e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 2.028815e+00 +Avg ME (F77/GPU) = 2.0288173652952537 +Relative difference = 1.1658506339321586e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.198151e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.269622e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.269622e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.968527e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.063388e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.063388e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 4.914840 sec +TOTAL : 3.683255 sec INFO: No Floating Point Exceptions have been reported - 14,211,276,974 cycles # 2.889 GHz - 38,370,210,397 instructions # 2.70 insn per cycle - 4.920108721 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) + 12,618,238,423 cycles:u # 3.419 GHz (74.86%) + 6,998,002 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.92%) + 1,275,040,191 stalled-cycles-backend:u # 10.10% backend cycles idle (75.03%) + 36,998,101,411 instructions:u # 2.93 insn per cycle + # 0.03 stalled cycles per insn (75.08%) + 3.694249748 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 578) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288199022179469 -Relative difference = 4.819651478256564e-08 +Avg ME (F77/C++) = 2.0288198367925361 +Relative difference = 8.044452636897417e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.892733e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.301573e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.301573e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 2.288479 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 6.353191e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.790048e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.790048e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 +TOTAL : 1.800944 sec INFO: No Floating Point Exceptions have been reported - 6,608,042,838 cycles # 2.882 GHz - 15,829,158,403 instructions # 2.40 insn per cycle - 2.293691008 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2690) (avx2: 0) (512y: 0) (512z: 0) + 6,110,101,207 cycles:u # 3.377 GHz (74.80%) + 6,856,178 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.81%) + 2,138,374,305 stalled-cycles-backend:u # 35.00% backend cycles idle (74.84%) + 15,165,134,831 instructions:u # 2.48 insn per cycle + # 0.14 stalled cycles per insn (75.05%) + 1.810926152 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2459) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288193548331037 -Relative difference = 1.748963824709674e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028820e+00 +Avg ME (F77/C++) = 2.0288198773050681 +Relative difference = 6.047600673895608e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.919042e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.023820e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.023820e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.319201 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.221950e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.378779e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.378779e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 1.006094 sec INFO: No Floating Point Exceptions have been reported - 3,618,631,378 cycles # 2.734 GHz - 7,578,247,859 instructions # 2.09 insn per cycle - 1.324366743 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3049) (512y: 0) (512z: 0) + 3,309,075,319 cycles:u # 3.263 GHz (74.76%) + 7,259,054 stalled-cycles-frontend:u # 0.22% frontend cycles idle (74.76%) + 1,112,117,116 stalled-cycles-backend:u # 33.61% backend cycles idle (74.78%) + 7,718,708,171 instructions:u # 2.33 insn per cycle + # 0.14 stalled cycles per insn (74.82%) + 1.015424058 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3071) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181684445590 -Relative difference = 8.302595855806234e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288186294492334 +Relative difference = 1.826435805832187e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.492699e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.100151e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.100151e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.250231 sec -INFO: No Floating Point Exceptions have been reported - 3,418,366,623 cycles # 2.724 GHz - 7,152,275,486 instructions # 2.09 insn per cycle - 1.255758340 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2849) (512y: 23) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181684445590 -Relative difference = 8.302595855806234e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.830732e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.562097e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.562097e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.681529 sec -INFO: No Floating Point Exceptions have been reported - 3,218,452,038 cycles # 1.909 GHz - 5,786,270,960 instructions # 1.80 insn per cycle - 1.686847993 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2368) (512y: 24) (512z: 1888) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288183349184692 -Relative difference = 1.6508058850146622e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt index 7e3b1fa48e..1024b8038f 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt @@ -1,220 +1,176 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:06:52 +DATE: 2024-05-16_16:32:22 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 --rmbhst OMP= +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.502594e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.623050e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.943883e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 -TOTAL : 0.628666 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.639842e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.882829e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.039851e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.079682e+00 +- 3.408341e-03 ) GeV^0 +TOTAL : 1.215821 sec INFO: No Floating Point Exceptions have been reported - 2,403,264,425 cycles # 2.820 GHz - 3,734,811,294 instructions # 1.55 insn per cycle - 0.909767197 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 3,530,861,000 cycles:u # 2.896 GHz (74.93%) + 22,024,313 stalled-cycles-frontend:u # 0.62% frontend cycles idle (75.13%) + 1,135,065,941 stalled-cycles-backend:u # 32.15% backend cycles idle (75.13%) + 3,804,129,287 instructions:u # 1.08 insn per cycle + # 0.30 stalled cycles per insn (75.02%) + 1.266337670 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028811e+00 -Avg ME (F77/GPU) = 2.0288499749731272 -Relative difference = 1.9210746159747678e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 2.028815e+00 +Avg ME (F77/GPU) = 2.0288173652952537 +Relative difference = 1.1658506339321586e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.185686e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.257300e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.257300e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.884819 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.943816e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.034642e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.034642e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 3.713292 sec INFO: No Floating Point Exceptions have been reported - 13,995,449,913 cycles # 2.863 GHz - 38,340,978,131 instructions # 2.74 insn per cycle - 4.889991891 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) + 12,596,850,375 cycles:u # 3.386 GHz (74.87%) + 7,002,203 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.97%) + 1,268,502,926 stalled-cycles-backend:u # 10.07% backend cycles idle (75.06%) + 37,037,500,760 instructions:u # 2.94 insn per cycle + # 0.03 stalled cycles per insn (75.06%) + 3.722349634 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 578) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288199022179469 -Relative difference = 4.819651478256564e-08 +Avg ME (F77/C++) = 2.0288198367925361 +Relative difference = 8.044452636897417e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.864053e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.263128e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.263128e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.244540 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 6.145526e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.573467e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.573467e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 +TOTAL : 1.852312 sec INFO: No Floating Point Exceptions have been reported - 6,436,419,349 cycles # 2.862 GHz - 15,815,556,279 instructions # 2.46 insn per cycle - 2.249779623 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2690) (avx2: 0) (512y: 0) (512z: 0) + 6,095,909,330 cycles:u # 3.278 GHz (74.98%) + 6,762,916 stalled-cycles-frontend:u # 0.11% frontend cycles idle (75.05%) + 2,125,898,597 stalled-cycles-backend:u # 34.87% backend cycles idle (75.05%) + 15,162,942,401 instructions:u # 2.49 insn per cycle + # 0.14 stalled cycles per insn (75.05%) + 1.861268659 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2459) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288193548331037 -Relative difference = 1.748963824709674e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028820e+00 +Avg ME (F77/C++) = 2.0288198773050681 +Relative difference = 6.047600673895608e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.799961e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.008748e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.008748e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.279634 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.198895e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.353510e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.353510e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 1.024526 sec INFO: No Floating Point Exceptions have been reported - 3,447,592,643 cycles # 2.685 GHz - 7,593,708,789 instructions # 2.20 insn per cycle - 1.284877623 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3049) (512y: 0) (512z: 0) + 3,351,625,848 cycles:u # 3.247 GHz (74.47%) + 6,982,467 stalled-cycles-frontend:u # 0.21% frontend cycles idle (74.79%) + 1,103,052,859 stalled-cycles-backend:u # 32.91% backend cycles idle (75.17%) + 7,621,520,415 instructions:u # 2.27 insn per cycle + # 0.14 stalled cycles per insn (75.20%) + 1.033919829 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3071) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181684445590 -Relative difference = 8.302595855806234e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288186294492334 +Relative difference = 1.826435805832187e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.434984e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.092289e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.092289e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.198937 sec -INFO: No Floating Point Exceptions have been reported - 3,242,375,801 cycles # 2.694 GHz - 7,202,509,960 instructions # 2.22 insn per cycle - 1.204245270 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2849) (512y: 23) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181684445590 -Relative difference = 8.302595855806234e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.713311e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.432943e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.432943e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.650810 sec -INFO: No Floating Point Exceptions have been reported - 3,050,285,995 cycles # 1.842 GHz - 5,834,789,164 instructions # 1.91 insn per cycle - 1.656446986 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2368) (512y: 24) (512z: 1888) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288183349184692 -Relative difference = 1.6508058850146622e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt index 3e123e6fd7..5d2c7c47a2 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_14:36:11 +DATE: 2024-05-16_15:57:00 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.096553e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.763289e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.037690e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.480283 sec +EvtsPerSec[Rmb+ME] (23) = ( 8.721482e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.085896e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.266185e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.078079e+00 +- 3.394933e-03 ) GeV^0 +TOTAL : 0.477038 sec INFO: No Floating Point Exceptions have been reported - 2,036,711,218 cycles # 2.852 GHz - 2,918,453,967 instructions # 1.43 insn per cycle - 0.771336406 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 751,230,740 cycles:u # 1.945 GHz (74.81%) + 2,180,377 stalled-cycles-frontend:u # 0.29% frontend cycles idle (73.52%) + 6,473,295 stalled-cycles-backend:u # 0.86% backend cycles idle (74.74%) + 1,376,631,056 instructions:u # 1.83 insn per cycle + # 0.00 stalled cycles per insn (75.39%) + 0.532053272 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028811e+00 -Avg ME (F77/GPU) = 2.0288499749731272 -Relative difference = 1.9210746159747678e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 2.028815e+00 +Avg ME (F77/GPU) = 2.0288173652952537 +Relative difference = 1.1658506339321586e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.166079e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.236793e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.236793e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.926463 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.624046e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.702467e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.702467e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 4.160753 sec INFO: No Floating Point Exceptions have been reported - 14,320,299,267 cycles # 2.905 GHz - 39,836,243,439 instructions # 2.78 insn per cycle - 4.931482509 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 570) (avx2: 0) (512y: 0) (512z: 0) + 12,670,532,130 cycles:u # 3.039 GHz (74.90%) + 7,285,144 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.92%) + 9,227,413 stalled-cycles-backend:u # 0.07% backend cycles idle (75.00%) + 37,386,257,114 instructions:u # 2.95 insn per cycle + # 0.00 stalled cycles per insn (75.06%) + 4.173070232 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 503) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288199028000236 -Relative difference = 4.790961076489297e-08 +Avg ME (F77/C++) = 2.0288198367925361 +Relative difference = 8.044452636897417e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.723514e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.285593e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.285593e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 1.919156 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 6.415156e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.929604e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.929604e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 +TOTAL : 1.799517 sec INFO: No Floating Point Exceptions have been reported - 5,582,245,803 cycles # 2.902 GHz - 15,285,424,302 instructions # 2.74 insn per cycle - 1.924109376 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2474) (avx2: 0) (512y: 0) (512z: 0) + 5,366,705,913 cycles:u # 2.967 GHz (74.82%) + 6,883,107 stalled-cycles-frontend:u # 0.13% frontend cycles idle (74.81%) + 1,361,757,615 stalled-cycles-backend:u # 25.37% backend cycles idle (74.81%) + 15,224,904,680 instructions:u # 2.84 insn per cycle + # 0.09 stalled cycles per insn (74.98%) + 1.811719603 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2330) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288193548331037 -Relative difference = 1.748963824709674e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028820e+00 +Avg ME (F77/C++) = 2.0288198773050681 +Relative difference = 6.047600673895608e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.349024e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.991002e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.991002e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.737963 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 7.808294e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.509321e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.509321e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 1.507388 sec INFO: No Floating Point Exceptions have been reported - 4,749,494,972 cycles # 2.726 GHz - 9,735,095,064 instructions # 2.05 insn per cycle - 1.742978161 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3708) (512y: 0) (512z: 0) + 4,478,110,129 cycles:u # 2.951 GHz (74.62%) + 6,841,835 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.78%) + 1,661,162,208 stalled-cycles-backend:u # 37.10% backend cycles idle (75.02%) + 9,764,490,974 instructions:u # 2.18 insn per cycle + # 0.17 stalled cycles per insn (75.22%) + 1.560863897 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3750) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288182108197361 -Relative difference = 1.0391259163456515e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288186428369954 +Relative difference = 1.7604478492421832e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.536931e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.219273e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.219273e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.690263 sec -INFO: No Floating Point Exceptions have been reported - 4,623,322,631 cycles # 2.728 GHz - 9,325,575,279 instructions # 2.02 insn per cycle - 1.695318457 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3496) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288182108197361 -Relative difference = 1.0391259163456515e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.572579e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.052133e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.052133e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.968034 sec -INFO: No Floating Point Exceptions have been reported - 3,660,831,684 cycles # 1.856 GHz - 7,034,974,988 instructions # 1.92 insn per cycle - 1.973212700 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2610) (512y: 12) (512z: 2220) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288183459779248 -Relative difference = 1.7053177021099307e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt index c7eded0fc2..3b4ac1f2ba 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_14:52:42 +DATE: 2024-05-16_16:15:34 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.456356e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.657836e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.983561e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.487671 sec +EvtsPerSec[Rmb+ME] (23) = ( 9.262621e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.917369e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.077168e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.078079e+00 +- 3.394933e-03 ) GeV^0 +TOTAL : 0.317044 sec INFO: No Floating Point Exceptions have been reported - 2,030,099,363 cycles # 2.844 GHz - 2,856,891,631 instructions # 1.41 insn per cycle - 0.771313393 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 786,752,584 cycles:u # 2.316 GHz (73.25%) + 2,044,002 stalled-cycles-frontend:u # 0.26% frontend cycles idle (73.57%) + 5,201,629 stalled-cycles-backend:u # 0.66% backend cycles idle (75.92%) + 1,271,782,822 instructions:u # 1.62 insn per cycle + # 0.00 stalled cycles per insn (76.48%) + 0.366405996 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028811e+00 -Avg ME (F77/GPU) = 2.0288499749731272 -Relative difference = 1.9210746159747678e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 2.028815e+00 +Avg ME (F77/GPU) = 2.0288173652952537 +Relative difference = 1.1658506339321586e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.397227e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.481743e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.481743e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.460576 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 3.193781e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.296393e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.296393e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 3.430914 sec INFO: No Floating Point Exceptions have been reported - 12,588,647,411 cycles # 2.819 GHz - 34,372,288,545 instructions # 2.73 insn per cycle - 4.465853868 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 696) (avx2: 0) (512y: 0) (512z: 0) + 11,744,320,248 cycles:u # 3.415 GHz (74.87%) + 6,783,164 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.88%) + 10,253,426 stalled-cycles-backend:u # 0.09% backend cycles idle (74.97%) + 34,149,232,337 instructions:u # 2.91 insn per cycle + # 0.00 stalled cycles per insn (75.07%) + 3.442357286 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 768) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288199094356969 -Relative difference = 4.463890496342449e-08 +Avg ME (F77/C++) = 2.0288199088536203 +Relative difference = 4.4925808981097166e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.225217e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.687950e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.687950e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.095644 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 7.330381e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.912028e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.912028e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 +TOTAL : 1.578213 sec INFO: No Floating Point Exceptions have been reported - 6,085,238,066 cycles # 2.897 GHz - 14,860,574,019 instructions # 2.44 insn per cycle - 2.101017455 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3009) (avx2: 0) (512y: 0) (512z: 0) + 5,330,914,740 cycles:u # 3.361 GHz (74.76%) + 6,901,621 stalled-cycles-frontend:u # 0.13% frontend cycles idle (74.80%) + 2,180,786,368 stalled-cycles-backend:u # 40.91% backend cycles idle (74.92%) + 14,553,683,782 instructions:u # 2.73 insn per cycle + # 0.15 stalled cycles per insn (75.14%) + 1.589188713 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2947) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288193803280592 -Relative difference = 1.8746278463897685e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028820e+00 +Avg ME (F77/C++) = 2.0288198769558221 +Relative difference = 6.06481491495597e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.969640e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.750011e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.750011e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.592133 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 9.171501e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.002861e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.002861e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 1.290076 sec INFO: No Floating Point Exceptions have been reported - 4,316,607,801 cycles # 2.703 GHz - 9,028,975,402 instructions # 2.09 insn per cycle - 1.597664902 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4443) (512y: 0) (512z: 0) + 4,203,310,590 cycles:u # 3.237 GHz (74.86%) + 7,035,707 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.75%) + 1,650,350,604 stalled-cycles-backend:u # 39.26% backend cycles idle (74.76%) + 9,089,278,343 instructions:u # 2.16 insn per cycle + # 0.18 stalled cycles per insn (74.79%) + 1.301799243 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4501) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181999931112 -Relative difference = 9.857617164523888e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288186752004549 +Relative difference = 1.6009291367898262e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.187100e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.023996e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.023996e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.545422 sec -INFO: No Floating Point Exceptions have been reported - 4,204,195,380 cycles # 2.712 GHz - 8,663,569,400 instructions # 2.06 insn per cycle - 1.550927334 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4243) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181999931112 -Relative difference = 9.857617164523888e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.251438e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.680453e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.680453e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 2.083936 sec -INFO: No Floating Point Exceptions have been reported - 3,833,998,104 cycles # 1.836 GHz - 7,808,361,622 instructions # 2.04 insn per cycle - 2.089489123 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4424) (512y: 0) (512z: 2555) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288183246739209 -Relative difference = 1.6003107281264138e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt index aad34f68a4..cb765f6595 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_14:53:02 +DATE: 2024-05-16_16:15:46 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.520611e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.721194e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.056652e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.485221 sec +EvtsPerSec[Rmb+ME] (23) = ( 8.739392e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.097312e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.278679e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.078079e+00 +- 3.394933e-03 ) GeV^0 +TOTAL : 0.347002 sec INFO: No Floating Point Exceptions have been reported - 2,023,639,378 cycles # 2.841 GHz - 2,891,046,466 instructions # 1.43 insn per cycle - 0.769493206 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 759,139,838 cycles:u # 2.271 GHz (72.72%) + 2,093,326 stalled-cycles-frontend:u # 0.28% frontend cycles idle (75.90%) + 5,604,886 stalled-cycles-backend:u # 0.74% backend cycles idle (76.10%) + 1,256,158,675 instructions:u # 1.65 insn per cycle + # 0.00 stalled cycles per insn (76.19%) + 0.393998470 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028811e+00 -Avg ME (F77/GPU) = 2.0288499749731272 -Relative difference = 1.9210746159747678e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 2.028815e+00 +Avg ME (F77/GPU) = 2.0288173652952537 +Relative difference = 1.1658506339321586e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.614708e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.719370e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.719370e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.097789 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 3.464648e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.585788e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.585788e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 3.172917 sec INFO: No Floating Point Exceptions have been reported - 11,755,034,517 cycles # 2.866 GHz - 35,108,588,793 instructions # 2.99 insn per cycle - 4.103114971 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 470) (avx2: 0) (512y: 0) (512z: 0) + 10,837,277,551 cycles:u # 3.407 GHz (74.86%) + 7,096,693 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.93%) + 12,679,835 stalled-cycles-backend:u # 0.12% backend cycles idle (75.05%) + 35,363,647,748 instructions:u # 3.26 insn per cycle + # 0.00 stalled cycles per insn (75.11%) + 3.184330509 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 469) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288199094356969 -Relative difference = 4.463890496342449e-08 +Avg ME (F77/C++) = 2.0288199088536203 +Relative difference = 4.4925808981097166e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.332294e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.809853e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.809853e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.053683 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 7.898283e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.586422e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.586422e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 +TOTAL : 1.473160 sec INFO: No Floating Point Exceptions have been reported - 5,951,415,517 cycles # 2.891 GHz - 14,470,123,335 instructions # 2.43 insn per cycle - 2.059025817 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2572) (avx2: 0) (512y: 0) (512z: 0) + 4,987,746,148 cycles:u # 3.368 GHz (74.67%) + 7,158,673 stalled-cycles-frontend:u # 0.14% frontend cycles idle (74.90%) + 1,207,982,497 stalled-cycles-backend:u # 24.22% backend cycles idle (75.16%) + 13,978,550,667 instructions:u # 2.80 insn per cycle + # 0.09 stalled cycles per insn (75.16%) + 1.484769395 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2487) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288193583255634 -Relative difference = 1.7661780742548925e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028820e+00 +Avg ME (F77/C++) = 2.0288198892958462 +Relative difference = 5.4565783974899003e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.326940e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.191185e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.191185e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.518155 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.022259e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.129743e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.129743e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 1.171796 sec INFO: No Floating Point Exceptions have been reported - 4,152,217,913 cycles # 2.727 GHz - 8,874,854,960 instructions # 2.14 insn per cycle - 1.523530355 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3574) (512y: 0) (512z: 0) + 3,896,994,070 cycles:u # 3.303 GHz (74.94%) + 7,825,414 stalled-cycles-frontend:u # 0.20% frontend cycles idle (74.92%) + 1,415,466,671 stalled-cycles-backend:u # 36.32% backend cycles idle (74.92%) + 8,569,308,194 instructions:u # 2.20 insn per cycle + # 0.17 stalled cycles per insn (74.92%) + 1.182924741 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3422) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288182107033208 -Relative difference = 1.0385521077446488e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288186836987734 +Relative difference = 1.559041129563128e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.326335e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.192412e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.192412e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.518142 sec -INFO: No Floating Point Exceptions have been reported - 4,138,145,120 cycles # 2.717 GHz - 8,411,511,000 instructions # 2.03 insn per cycle - 1.523559219 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3319) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288182107033208 -Relative difference = 1.0385521077446488e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.337364e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.777859e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.777859e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 2.053123 sec -INFO: No Floating Point Exceptions have been reported - 3,784,038,038 cycles # 1.840 GHz - 7,702,433,783 instructions # 2.04 insn per cycle - 2.058532499 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3440) (512y: 0) (512z: 2107) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288183204829693 -Relative difference = 1.5796536184903122e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index ff88d5da2d..970ea8edab 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_14:36:32 +DATE: 2024-05-16_15:57:14 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.198792e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.180605e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.275668e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.521467 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.795249e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.009788e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.065293e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 +TOTAL : 0.461697 sec INFO: No Floating Point Exceptions have been reported - 2,143,649,339 cycles # 2.843 GHz - 3,098,162,725 instructions # 1.45 insn per cycle - 0.810608393 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 895,448,377 cycles:u # 2.024 GHz (75.08%) + 2,236,641 stalled-cycles-frontend:u # 0.25% frontend cycles idle (74.45%) + 6,321,635 stalled-cycles-backend:u # 0.71% backend cycles idle (75.11%) + 1,449,839,670 instructions:u # 1.62 insn per cycle + # 0.00 stalled cycles per insn (75.29%) + 0.518730258 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063423243874 -Relative difference = 3.241686432649386e-07 +Avg ME (F77/GPU) = 2.0288063423243869 +Relative difference = 3.241686434838304e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.033714e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.092456e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.092456e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.262850 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.180427e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.235310e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.235310e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 5.012988 sec INFO: No Floating Point Exceptions have been reported - 15,278,986,093 cycles # 2.901 GHz - 38,575,389,182 instructions # 2.52 insn per cycle - 5.268064562 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 672) (avx2: 0) (512y: 0) (512z: 0) + 15,188,686,492 cycles:u # 3.022 GHz (75.00%) + 9,992,311 stalled-cycles-frontend:u # 0.07% frontend cycles idle (75.01%) + 322,116,034 stalled-cycles-backend:u # 2.12% backend cycles idle (75.01%) + 39,218,384,975 instructions:u # 2.58 insn per cycle + # 0.01 stalled cycles per insn (75.01%) + 5.042832677 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 740) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.527314e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.723139e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.723139e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.080390 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 3.932926e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.126192e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.126192e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.869194 sec INFO: No Floating Point Exceptions have been reported - 8,961,614,258 cycles # 2.906 GHz - 24,226,315,758 instructions # 2.70 insn per cycle - 3.085434765 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2188) (avx2: 0) (512y: 0) (512z: 0) + 8,570,803,625 cycles:u # 2.974 GHz (74.98%) + 9,411,933 stalled-cycles-frontend:u # 0.11% frontend cycles idle (75.02%) + 1,791,447,510 stalled-cycles-backend:u # 20.90% backend cycles idle (75.03%) + 24,011,968,827 instructions:u # 2.80 insn per cycle + # 0.07 stalled cycles per insn (75.03%) + 2.887958027 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2102) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.613394e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.100134e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.100134e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.976346 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 6.935252e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.478607e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.478607e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.715190 sec INFO: No Floating Point Exceptions have been reported - 5,394,338,439 cycles # 2.724 GHz - 11,277,527,499 instructions # 2.09 insn per cycle - 1.981499886 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2480) (512y: 0) (512z: 0) + 5,004,335,960 cycles:u # 2.896 GHz (75.01%) + 8,270,955 stalled-cycles-frontend:u # 0.17% frontend cycles idle (75.01%) + 1,428,725,699 stalled-cycles-backend:u # 28.55% backend cycles idle (75.00%) + 11,394,010,172 instructions:u # 2.28 insn per cycle + # 0.13 stalled cycles per insn (75.00%) + 1.731479036 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2467) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288064057068964 -Relative difference = 2.9292737240031234e-07 +Avg ME (F77/C++) = 2.0288063930599014 +Relative difference = 2.9916108265801754e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.276948e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.897611e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.897611e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.778784 sec -INFO: No Floating Point Exceptions have been reported - 4,855,499,941 cycles # 2.723 GHz - 10,526,571,188 instructions # 2.17 insn per cycle - 1.784170390 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2167) (512y: 148) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288064057068964 -Relative difference = 2.9292737240031234e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.815864e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.036087e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.036087e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.856419 sec -INFO: No Floating Point Exceptions have been reported - 5,199,981,370 cycles # 1.818 GHz - 7,603,665,117 instructions # 1.46 insn per cycle - 2.861804972 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1633) (512y: 126) (512z: 1608) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288064057068964 -Relative difference = 2.9292737240031234e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt index 1d76304278..dcef71b8d4 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_14:36:55 +DATE: 2024-05-16_15:57:29 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.208651e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.184994e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.280716e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.522551 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.814587e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.921539e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.974776e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 +TOTAL : 0.455134 sec INFO: No Floating Point Exceptions have been reported - 2,145,230,616 cycles # 2.840 GHz - 3,093,123,772 instructions # 1.44 insn per cycle - 0.812278354 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 885,238,248 cycles:u # 2.013 GHz (74.14%) + 2,253,424 stalled-cycles-frontend:u # 0.25% frontend cycles idle (74.15%) + 6,274,029 stalled-cycles-backend:u # 0.71% backend cycles idle (75.38%) + 1,416,790,914 instructions:u # 1.60 insn per cycle + # 0.00 stalled cycles per insn (76.34%) + 0.512623577 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063423243874 -Relative difference = 3.241686432649386e-07 +Avg ME (F77/GPU) = 2.0288063423243869 +Relative difference = 3.241686434838304e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.021911e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.079930e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.079930e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.294031 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.434028e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.498307e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.498307e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 4.501425 sec INFO: No Floating Point Exceptions have been reported - 15,341,153,400 cycles # 2.896 GHz - 40,370,282,827 instructions # 2.63 insn per cycle - 5.299425936 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 669) (avx2: 0) (512y: 0) (512z: 0) + 15,391,438,023 cycles:u # 3.410 GHz (75.01%) + 9,507,342 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.01%) + 18,410,071 stalled-cycles-backend:u # 0.12% backend cycles idle (75.01%) + 40,052,703,727 instructions:u # 2.60 insn per cycle + # 0.00 stalled cycles per insn (75.01%) + 4.526661105 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 630) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.710012e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.926494e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.926494e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.935201 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 4.548993e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.785959e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.785959e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.484330 sec INFO: No Floating Point Exceptions have been reported - 8,515,314,447 cycles # 2.897 GHz - 23,253,613,819 instructions # 2.73 insn per cycle - 2.940392108 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2091) (avx2: 0) (512y: 0) (512z: 0) + 8,410,602,245 cycles:u # 3.370 GHz (75.01%) + 9,858,507 stalled-cycles-frontend:u # 0.12% frontend cycles idle (75.00%) + 393,451,624 stalled-cycles-backend:u # 4.68% backend cycles idle (75.00%) + 23,510,306,264 instructions:u # 2.80 insn per cycle + # 0.02 stalled cycles per insn (75.00%) + 2.513065225 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1993) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.780066e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.132607e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.132607e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.302191 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 6.841073e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.308539e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.308539e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.712883 sec INFO: No Floating Point Exceptions have been reported - 6,262,262,467 cycles # 2.715 GHz - 12,962,490,062 instructions # 2.07 insn per cycle - 2.307689771 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2669) (512y: 0) (512z: 0) + 5,702,695,154 cycles:u # 3.306 GHz (74.99%) + 11,257,877 stalled-cycles-frontend:u # 0.20% frontend cycles idle (74.95%) + 757,110,103 stalled-cycles-backend:u # 13.28% backend cycles idle (74.95%) + 13,044,940,417 instructions:u # 2.29 insn per cycle + # 0.06 stalled cycles per insn (74.97%) + 1.728031458 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2711) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288064057068964 -Relative difference = 2.9292737240031234e-07 +Avg ME (F77/C++) = 2.0288063930599014 +Relative difference = 2.9916108265801754e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.109643e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.511847e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.511847e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.160493 sec -INFO: No Floating Point Exceptions have been reported - 5,903,466,716 cycles # 2.727 GHz - 12,238,680,442 instructions # 2.07 insn per cycle - 2.165768560 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2209) (512y: 296) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288064057068964 -Relative difference = 2.9292737240031234e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.507940e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.694154e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.694154e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.097205 sec -INFO: No Floating Point Exceptions have been reported - 5,614,268,818 cycles # 1.810 GHz - 8,744,074,840 instructions # 1.56 insn per cycle - 3.102417520 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1490) (512y: 183) (512z: 1909) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288064057068964 -Relative difference = 2.9292737240031234e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index 1d7490861d..c3705c5c6b 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -1,233 +1,193 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-16_14:37:19 +DATE: 2024-05-16_15:57:43 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.992211e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.047041e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.061161e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.469322 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.891318e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.028000e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.031492e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.872208e+03 +- 2.725298e+03 ) GeV^-2 +TOTAL : 0.390466 sec INFO: No Floating Point Exceptions have been reported - 1,970,950,644 cycles # 2.853 GHz - 2,836,233,202 instructions # 1.44 insn per cycle - 0.747868437 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 1,063,597,301 cycles:u # 2.575 GHz (74.08%) + 2,246,178 stalled-cycles-frontend:u # 0.21% frontend cycles idle (73.31%) + 5,976,134 stalled-cycles-backend:u # 0.56% backend cycles idle (74.43%) + 1,546,122,425 instructions:u # 1.45 insn per cycle + # 0.00 stalled cycles per insn (74.52%) + 0.434332130 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.129686e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.329949e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.341716e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.607217 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.616703e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.842455e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.847657e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.805651e+03 +- 1.746055e+03 ) GeV^-2 +TOTAL : 0.624660 sec INFO: No Floating Point Exceptions have been reported - 2,397,125,482 cycles # 2.825 GHz - 3,658,262,516 instructions # 1.53 insn per cycle - 0.909559944 seconds time elapsed + 1,725,391,280 cycles:u # 2.688 GHz (74.98%) + 2,211,883 stalled-cycles-frontend:u # 0.13% frontend cycles idle (75.19%) + 5,501,560 stalled-cycles-backend:u # 0.32% backend cycles idle (75.11%) + 2,048,112,265 instructions:u # 1.19 insn per cycle + # 0.00 stalled cycles per insn (75.11%) + 0.673690421 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213684418649 -Relative difference = 4.469239988637851e-07 +Avg ME (F77/GPU) = 1.4131213684418642 +Relative difference = 4.4692399933517674e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.379379e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.391311e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.391311e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.910347 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.966593e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.979040e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.979040e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 5.551061 sec INFO: No Floating Point Exceptions have been reported - 19,789,020,586 cycles # 2.863 GHz - 59,609,829,111 instructions # 3.01 insn per cycle - 6.914699001 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1466) (avx2: 0) (512y: 0) (512z: 0) + 19,445,623,666 cycles:u # 3.501 GHz (74.94%) + 2,647,514 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.94%) + 3,109,224,911 stalled-cycles-backend:u # 15.99% backend cycles idle (74.95%) + 57,883,703,782 instructions:u # 2.98 insn per cycle + # 0.05 stalled cycles per insn (75.02%) + 5.557879289 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1134) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432433 -Relative difference = 4.46923023397472e-07 +Avg ME (F77/C++) = 1.4131213684432431 +Relative difference = 4.4692302355460254e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.619966e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.665049e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.665049e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.569460 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 5.963083e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.012897e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.012897e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 2.775187 sec INFO: No Floating Point Exceptions have been reported - 10,374,266,250 cycles # 2.904 GHz - 30,674,256,165 instructions # 2.96 insn per cycle - 3.573646642 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5153) (avx2: 0) (512y: 0) (512z: 0) + 9,655,531,377 cycles:u # 3.475 GHz (74.97%) + 2,446,914 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.96%) + 2,489,896,593 stalled-cycles-backend:u # 25.79% backend cycles idle (74.96%) + 29,866,888,415 instructions:u # 3.09 insn per cycle + # 0.08 stalled cycles per insn (74.96%) + 2.781688940 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4742) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.120184e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.293257e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.293257e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.820051 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.233857e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.254984e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.254984e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.355463 sec INFO: No Floating Point Exceptions have been reported - 4,901,380,147 cycles # 2.688 GHz - 11,019,047,598 instructions # 2.25 insn per cycle - 1.824311195 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4467) (512y: 0) (512z: 0) + 4,723,300,885 cycles:u # 3.473 GHz (75.06%) + 1,988,659 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.87%) + 1,565,675,561 stalled-cycles-backend:u # 33.15% backend cycles idle (74.72%) + 11,209,857,831 instructions:u # 2.37 insn per cycle + # 0.14 stalled cycles per insn (74.67%) + 1.374600251 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4396) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.028182e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.049956e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.049956e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.616748 sec -INFO: No Floating Point Exceptions have been reported - 4,378,615,331 cycles # 2.702 GHz - 10,296,117,856 instructions # 2.35 insn per cycle - 1.621129053 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4137) (512y: 91) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416484 -Relative difference = 4.469241520660492e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.954224e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.056280e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.056280e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.381742 sec -INFO: No Floating Point Exceptions have been reported - 4,108,596,097 cycles # 1.723 GHz - 5,842,404,115 instructions # 1.42 insn per cycle - 2.385936782 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1540) (512y: 95) (512z: 3466) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416484 -Relative difference = 4.469241520660492e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt index 45a1ef164b..de130d0971 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt @@ -1,250 +1,204 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-16_15:01:40 +DATE: 2024-05-16_16:27:39 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.535443e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.780857e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.780857e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.503670 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.477984e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.990536e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.990536e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 0.591147 sec INFO: No Floating Point Exceptions have been reported - 2,012,376,201 cycles # 2.812 GHz - 3,006,218,540 instructions # 1.49 insn per cycle - 0.774572160 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 1,657,650,273 cycles:u # 2.808 GHz (74.93%) + 6,517,743 stalled-cycles-frontend:u # 0.39% frontend cycles idle (75.03%) + 294,023,494 stalled-cycles-backend:u # 17.74% backend cycles idle (75.71%) + 2,018,685,371 instructions:u # 1.22 insn per cycle + # 0.15 stalled cycles per insn (75.16%) + 0.639012900 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.606024e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.624765e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.624765e+06 ) sec^-1 -MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.841754 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.209566e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.681342e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.681342e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.948724e+03 +- 1.840727e+03 ) GeV^-2 +TOTAL : 1.285664 sec INFO: No Floating Point Exceptions have been reported - 3,099,668,806 cycles # 2.832 GHz - 4,993,276,525 instructions # 1.61 insn per cycle - 1.155254157 seconds time elapsed + 3,837,444,034 cycles:u # 2.925 GHz (75.01%) + 16,902,808 stalled-cycles-frontend:u # 0.44% frontend cycles idle (75.01%) + 854,341,995 stalled-cycles-backend:u # 22.26% backend cycles idle (75.09%) + 3,891,087,275 instructions:u # 1.01 insn per cycle + # 0.22 stalled cycles per insn (74.98%) + 1.342450877 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213684418649 -Relative difference = 4.469239988637851e-07 +Avg ME (F77/GPU) = 1.4131213684418642 +Relative difference = 4.4692399933517674e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.380068e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.392068e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.392068e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.915910 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.930505e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.942916e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.942916e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 5.623528 sec INFO: No Floating Point Exceptions have been reported - 19,806,579,322 cycles # 2.863 GHz - 59,611,012,266 instructions # 3.01 insn per cycle - 6.920308116 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1466) (avx2: 0) (512y: 0) (512z: 0) + 19,574,655,100 cycles:u # 3.478 GHz (74.98%) + 2,123,896 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.98%) + 3,108,477,010 stalled-cycles-backend:u # 15.88% backend cycles idle (74.98%) + 57,822,618,304 instructions:u # 2.95 insn per cycle + # 0.05 stalled cycles per insn (74.98%) + 5.630887153 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1134) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432433 -Relative difference = 4.46923023397472e-07 +Avg ME (F77/C++) = 1.4131213684432431 +Relative difference = 4.4692302355460254e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.550339e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.594733e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.594733e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.631431 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 6.012452e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.062835e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.062835e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 2.757025 sec INFO: No Floating Point Exceptions have been reported - 10,404,134,292 cycles # 2.862 GHz - 30,722,305,980 instructions # 2.95 insn per cycle - 3.635916319 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5153) (avx2: 0) (512y: 0) (512z: 0) + 9,670,746,360 cycles:u # 3.503 GHz (74.82%) + 2,470,367 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.92%) + 2,490,285,908 stalled-cycles-backend:u # 25.75% backend cycles idle (75.06%) + 29,902,537,167 instructions:u # 3.09 insn per cycle + # 0.08 stalled cycles per insn (75.08%) + 2.763871519 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4742) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.991824e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.166141e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.166141e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.853778 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.229766e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.250819e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.250819e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.364425 sec INFO: No Floating Point Exceptions have been reported - 4,943,570,309 cycles # 2.661 GHz - 11,067,752,215 instructions # 2.24 insn per cycle - 1.858370590 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4467) (512y: 0) (512z: 0) + 4,761,520,184 cycles:u # 3.479 GHz (74.91%) + 2,281,684 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.87%) + 1,565,249,273 stalled-cycles-backend:u # 32.87% backend cycles idle (74.87%) + 11,208,960,590 instructions:u # 2.35 insn per cycle + # 0.14 stalled cycles per insn (74.86%) + 1.371395791 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4396) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.005140e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.026682e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.026682e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.662867 sec -INFO: No Floating Point Exceptions have been reported - 4,426,260,539 cycles # 2.656 GHz - 10,346,882,831 instructions # 2.34 insn per cycle - 1.667431238 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4137) (512y: 91) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416484 -Relative difference = 4.469241520660492e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.832038e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.932754e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.932754e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.431611 sec -INFO: No Floating Point Exceptions have been reported - 4,145,808,516 cycles # 1.702 GHz - 5,880,428,508 instructions # 1.42 insn per cycle - 2.436095886 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1540) (512y: 95) (512z: 3466) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416484 -Relative difference = 4.469241520660492e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt index c8d4c1d012..4db177d1cc 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt @@ -1,233 +1,193 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-16_14:37:45 +DATE: 2024-05-16_15:58:00 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.984938e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.044546e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.056865e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.468609 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.740149e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.002618e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.006335e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.872208e+03 +- 2.725298e+03 ) GeV^-2 +TOTAL : 0.434871 sec INFO: No Floating Point Exceptions have been reported - 1,981,002,182 cycles # 2.846 GHz - 2,842,945,772 instructions # 1.44 insn per cycle - 0.752497111 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 1,055,077,000 cycles:u # 2.553 GHz (75.94%) + 2,245,399 stalled-cycles-frontend:u # 0.21% frontend cycles idle (75.77%) + 5,490,265 stalled-cycles-backend:u # 0.52% backend cycles idle (74.87%) + 1,496,542,639 instructions:u # 1.42 insn per cycle + # 0.00 stalled cycles per insn (74.48%) + 0.482545681 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.119070e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.315352e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.326681e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.601408 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.534209e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.812283e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.817336e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.805651e+03 +- 1.746055e+03 ) GeV^-2 +TOTAL : 0.620606 sec INFO: No Floating Point Exceptions have been reported - 2,383,936,937 cycles # 2.851 GHz - 3,651,729,049 instructions # 1.53 insn per cycle - 0.896728355 seconds time elapsed + 1,701,929,126 cycles:u # 2.681 GHz (75.15%) + 2,195,017 stalled-cycles-frontend:u # 0.13% frontend cycles idle (75.26%) + 5,014,423 stalled-cycles-backend:u # 0.29% backend cycles idle (74.92%) + 2,070,616,173 instructions:u # 1.22 insn per cycle + # 0.00 stalled cycles per insn (74.34%) + 0.670567962 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213684418649 -Relative difference = 4.469239988637851e-07 +Avg ME (F77/GPU) = 1.4131213684418642 +Relative difference = 4.4692399933517674e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.454763e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.467389e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.467389e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.698212 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.959702e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.972085e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.972085e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 5.563715 sec INFO: No Floating Point Exceptions have been reported - 19,500,935,732 cycles # 2.911 GHz - 58,799,003,967 instructions # 3.02 insn per cycle - 6.702449206 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1313) (avx2: 0) (512y: 0) (512z: 0) + 19,472,101,807 cycles:u # 3.497 GHz (75.00%) + 2,608,263 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.00%) + 3,636,439,555 stalled-cycles-backend:u # 18.68% backend cycles idle (75.00%) + 57,674,794,693 instructions:u # 2.96 insn per cycle + # 0.06 stalled cycles per insn (75.00%) + 5.602683884 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1087) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432433 -Relative difference = 4.46923023397472e-07 +Avg ME (F77/C++) = 1.4131213684432431 +Relative difference = 4.4692302355460254e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.669930e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.715854e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.715854e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.531511 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 6.009055e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.059724e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.059724e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 2.754182 sec INFO: No Floating Point Exceptions have been reported - 10,228,095,464 cycles # 2.894 GHz - 30,347,180,891 instructions # 2.97 insn per cycle - 3.535798492 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4970) (avx2: 0) (512y: 0) (512z: 0) + 9,655,627,230 cycles:u # 3.501 GHz (74.89%) + 2,380,586 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.02%) + 2,402,259,382 stalled-cycles-backend:u # 24.88% backend cycles idle (75.06%) + 30,249,149,231 instructions:u # 3.13 insn per cycle + # 0.08 stalled cycles per insn (75.05%) + 2.765687472 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4806) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.789972e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.950829e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.950829e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.887432 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.218546e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.239135e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.239135e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.371826 sec INFO: No Floating Point Exceptions have been reported - 5,055,118,079 cycles # 2.674 GHz - 11,484,444,983 instructions # 2.27 insn per cycle - 1.891612421 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4591) (512y: 0) (512z: 0) + 4,796,137,030 cycles:u # 3.487 GHz (75.03%) + 2,128,645 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.99%) + 1,512,643,237 stalled-cycles-backend:u # 31.54% backend cycles idle (74.99%) + 11,610,627,473 instructions:u # 2.42 insn per cycle + # 0.13 stalled cycles per insn (74.99%) + 1.378449176 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4489) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.667837e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.860484e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.860484e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.718788 sec -INFO: No Floating Point Exceptions have been reported - 4,655,858,880 cycles # 2.704 GHz - 10,842,096,596 instructions # 2.33 insn per cycle - 1.722993406 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4183) (512y: 244) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416484 -Relative difference = 4.469241520660492e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.981237e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.082937e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.082937e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.372571 sec -INFO: No Floating Point Exceptions have been reported - 4,129,142,877 cycles # 1.738 GHz - 6,106,185,085 instructions # 1.48 insn per cycle - 2.376879303 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1457) (512y: 139) (512z: 3568) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416484 -Relative difference = 4.469241520660492e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index e4bc7cf2cc..d11c60b43f 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -1,233 +1,193 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-16_14:38:10 +DATE: 2024-05-16_15:58:16 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.514552e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.271085e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.366020e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 -TOTAL : 0.450662 sec +EvtsPerSec[Rmb+ME] (23) = ( 5.721105e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.902816e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.946296e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.415474e+04 +- 1.288238e+04 ) GeV^-2 +TOTAL : 0.373672 sec INFO: No Floating Point Exceptions have been reported - 1,888,418,045 cycles # 2.834 GHz - 2,686,004,303 instructions # 1.42 insn per cycle - 0.722549365 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 254 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 808,129,453 cycles:u # 2.344 GHz (73.73%) + 2,150,599 stalled-cycles-frontend:u # 0.27% frontend cycles idle (74.86%) + 5,396,964 stalled-cycles-backend:u # 0.67% backend cycles idle (74.68%) + 1,382,264,296 instructions:u # 1.71 insn per cycle + # 0.00 stalled cycles per insn (75.08%) + 0.427180204 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.424662e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.459806e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.527254e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.630099e+02 +- 4.770719e+02 ) GeV^-2 -TOTAL : 0.495261 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.377466e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.631328e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.636252e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.619620e+05 +- 1.611328e+05 ) GeV^-2 +TOTAL : 0.447870 sec INFO: No Floating Point Exceptions have been reported - 2,099,817,827 cycles # 2.862 GHz - 2,990,738,948 instructions # 1.42 insn per cycle - 0.790419941 seconds time elapsed + 1,170,076,475 cycles:u # 2.573 GHz (75.47%) + 2,155,939 stalled-cycles-frontend:u # 0.18% frontend cycles idle (75.13%) + 4,646,286 stalled-cycles-backend:u # 0.40% backend cycles idle (75.63%) + 1,543,400,318 instructions:u # 1.32 insn per cycle + # 0.00 stalled cycles per insn (75.96%) + 0.493199609 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.412608e+00 -Avg ME (F77/GPU) = 1.4132214346515752 -Relative difference = 0.00043425681546129636 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 1.412404e+00 +Avg ME (F77/GPU) = 1.4131669530965212 +Relative difference = 0.0005401804983001964 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.505220e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.518346e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.518346e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.562288 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 3.230818e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.246017e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.246017e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.724764e+02 +- 2.665343e+02 ) GeV^-2 +TOTAL : 5.097129 sec INFO: No Floating Point Exceptions have been reported - 19,080,957,547 cycles # 2.906 GHz - 58,959,648,789 instructions # 3.09 insn per cycle - 6.566573323 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1034) (avx2: 0) (512y: 0) (512z: 0) + 17,870,197,691 cycles:u # 3.503 GHz (74.92%) + 2,354,191 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.91%) + 3,156,273,545 stalled-cycles-backend:u # 17.66% backend cycles idle (74.96%) + 55,170,458,904 instructions:u # 3.09 insn per cycle + # 0.06 stalled cycles per insn (75.04%) + 5.103820232 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1229) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412986e+00 -Avg ME (F77/C++) = 1.4129858051842916 -Relative difference = 1.3787518662898538e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412998e+00 +Avg ME (F77/C++) = 1.4129978146120550 +Relative difference = 1.3120184529301602e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.204155e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.352745e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.352745e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 2.018056 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.072396e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.089312e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.089312e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.724763e+02 +- 2.665342e+02 ) GeV^-2 +TOTAL : 1.553153 sec INFO: No Floating Point Exceptions have been reported - 5,861,245,947 cycles # 2.899 GHz - 16,693,370,121 instructions # 2.85 insn per cycle - 2.022246601 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5765) (avx2: 0) (512y: 0) (512z: 0) + 5,434,417,773 cycles:u # 3.491 GHz (74.87%) + 2,070,185 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.82%) + 1,714,095,174 stalled-cycles-backend:u # 31.54% backend cycles idle (74.82%) + 16,105,306,032 instructions:u # 2.96 insn per cycle + # 0.11 stalled cycles per insn (74.82%) + 1.559534972 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5205) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412987e+00 -Avg ME (F77/C++) = 1.4129865669244737 -Relative difference = 3.06496469061158e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412986e+00 +Avg ME (F77/C++) = 1.4129857118325333 +Relative difference = 2.039421953066926e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.747206e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.811751e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.811751e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 -TOTAL : 0.959718 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.374359e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.455254e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.455254e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.743733e+02 +- 2.676611e+02 ) GeV^-2 +TOTAL : 0.715090 sec INFO: No Floating Point Exceptions have been reported - 2,597,973,759 cycles # 2.697 GHz - 5,979,816,432 instructions # 2.30 insn per cycle - 0.963957244 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4917) (512y: 0) (512z: 0) + 2,493,118,731 cycles:u # 3.470 GHz (74.40%) + 1,981,459 stalled-cycles-frontend:u # 0.08% frontend cycles idle (74.58%) + 803,670,261 stalled-cycles-backend:u # 32.24% backend cycles idle (75.14%) + 6,030,919,708 instructions:u # 2.42 insn per cycle + # 0.13 stalled cycles per insn (75.51%) + 0.721282507 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4878) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133161655815059 -Relative difference = 1.1715816267550621e-07 +Avg ME (F77/C++) = 1.4133162680784324 +Relative difference = 1.896804623606238e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.928786e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.008064e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.008064e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 -TOTAL : 0.871454 sec -INFO: No Floating Point Exceptions have been reported - 2,346,801,151 cycles # 2.682 GHz - 5,601,970,539 instructions # 2.39 insn per cycle - 0.875813732 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4645) (512y: 36) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133161655815059 -Relative difference = 1.1715816267550621e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.412327e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.455439e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.455439e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.184240 sec -INFO: No Floating Point Exceptions have been reported - 2,059,493,323 cycles # 1.734 GHz - 3,333,364,881 instructions # 1.62 insn per cycle - 1.188531798 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2144) (512y: 39) (512z: 3675) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133164033579249 -Relative difference = 2.85398258307829e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt index d735dc5897..b4c1f4a171 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt @@ -1,250 +1,204 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-16_15:02:06 +DATE: 2024-05-16_16:27:56 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.750186e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.085490e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.085490e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.009071e+02 +- 5.002295e+01 ) GeV^-2 -TOTAL : 0.469338 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.309309e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.780084e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.780084e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.755516e+02 +- 2.671055e+02 ) GeV^-2 +TOTAL : 0.496803 sec INFO: No Floating Point Exceptions have been reported - 1,918,362,944 cycles # 2.804 GHz - 2,834,169,916 instructions # 1.48 insn per cycle - 0.742178075 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 254 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 1,438,489,170 cycles:u # 2.782 GHz (75.08%) + 6,397,869 stalled-cycles-frontend:u # 0.44% frontend cycles idle (74.57%) + 286,798,330 stalled-cycles-backend:u # 19.94% backend cycles idle (74.51%) + 1,895,246,735 instructions:u # 1.32 insn per cycle + # 0.15 stalled cycles per insn (75.47%) + 0.542440996 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.524122e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.570005e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.570005e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.737500e+02 +- 4.776370e+02 ) GeV^-2 -TOTAL : 0.651816 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.138496e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.474548e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.474548e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.855934e+03 +- 1.791981e+03 ) GeV^-2 +TOTAL : 1.066962 sec INFO: No Floating Point Exceptions have been reported - 2,503,160,784 cycles # 2.822 GHz - 3,832,792,162 instructions # 1.53 insn per cycle - 0.943470239 seconds time elapsed + 3,242,368,022 cycles:u # 2.975 GHz (75.14%) + 16,804,648 stalled-cycles-frontend:u # 0.52% frontend cycles idle (75.06%) + 861,623,771 stalled-cycles-backend:u # 26.57% backend cycles idle (75.25%) + 3,452,903,859 instructions:u # 1.06 insn per cycle + # 0.25 stalled cycles per insn (75.25%) + 1.137885372 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.412608e+00 -Avg ME (F77/GPU) = 1.4132214346515752 -Relative difference = 0.00043425681546129636 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 1.412404e+00 +Avg ME (F77/GPU) = 1.4131669530965212 +Relative difference = 0.0005401804983001964 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.465694e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.479110e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.479110e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.671662 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 3.209957e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.225050e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.225050e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.724764e+02 +- 2.665343e+02 ) GeV^-2 +TOTAL : 5.132600 sec INFO: No Floating Point Exceptions have been reported - 19,108,337,453 cycles # 2.863 GHz - 58,967,331,894 instructions # 3.09 insn per cycle - 6.675976597 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1034) (avx2: 0) (512y: 0) (512z: 0) + 17,861,632,443 cycles:u # 3.477 GHz (74.93%) + 2,252,442 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.94%) + 3,175,691,457 stalled-cycles-backend:u # 17.78% backend cycles idle (75.02%) + 55,190,690,458 instructions:u # 3.09 insn per cycle + # 0.06 stalled cycles per insn (75.08%) + 5.145976347 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1229) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412986e+00 -Avg ME (F77/C++) = 1.4129858051842916 -Relative difference = 1.3787518662898538e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412998e+00 +Avg ME (F77/C++) = 1.4129978146120550 +Relative difference = 1.3120184529301602e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.093089e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.238027e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.238027e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 2.051178 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.076539e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.093727e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.093727e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.724763e+02 +- 2.665342e+02 ) GeV^-2 +TOTAL : 1.550391 sec INFO: No Floating Point Exceptions have been reported - 5,880,119,320 cycles # 2.862 GHz - 16,741,679,626 instructions # 2.85 insn per cycle - 2.055508197 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5765) (avx2: 0) (512y: 0) (512z: 0) + 5,413,097,028 cycles:u # 3.484 GHz (74.83%) + 2,034,925 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.78%) + 1,689,135,957 stalled-cycles-backend:u # 31.20% backend cycles idle (74.77%) + 16,170,572,616 instructions:u # 2.99 insn per cycle + # 0.10 stalled cycles per insn (74.93%) + 1.557344480 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5205) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412987e+00 -Avg ME (F77/C++) = 1.4129865669244737 -Relative difference = 3.06496469061158e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412986e+00 +Avg ME (F77/C++) = 1.4129857118325333 +Relative difference = 2.039421953066926e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.718905e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.782305e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.782305e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 -TOTAL : 0.980043 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.363212e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.443546e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.443546e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.743733e+02 +- 2.676611e+02 ) GeV^-2 +TOTAL : 0.721122 sec INFO: No Floating Point Exceptions have been reported - 2,616,418,693 cycles # 2.660 GHz - 6,017,096,104 instructions # 2.30 insn per cycle - 0.984343134 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4917) (512y: 0) (512z: 0) + 2,497,750,595 cycles:u # 3.442 GHz (74.65%) + 1,839,004 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.64%) + 812,027,393 stalled-cycles-backend:u # 32.51% backend cycles idle (74.81%) + 6,085,428,374 instructions:u # 2.44 insn per cycle + # 0.13 stalled cycles per insn (75.36%) + 0.746413862 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4878) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133161655815059 -Relative difference = 1.1715816267550621e-07 +Avg ME (F77/C++) = 1.4133162680784324 +Relative difference = 1.896804623606238e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.912882e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.991175e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.991175e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 -TOTAL : 0.883189 sec -INFO: No Floating Point Exceptions have been reported - 2,365,822,002 cycles # 2.667 GHz - 5,638,771,692 instructions # 2.38 insn per cycle - 0.887626463 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4645) (512y: 36) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133161655815059 -Relative difference = 1.1715816267550621e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.399129e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.441231e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.441231e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.200076 sec -INFO: No Floating Point Exceptions have been reported - 2,081,452,605 cycles # 1.729 GHz - 3,374,965,036 instructions # 1.62 insn per cycle - 1.204429196 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2144) (512y: 39) (512z: 3675) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133164033579249 -Relative difference = 2.85398258307829e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt index 3d41e21b12..b38fc5c0b0 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt @@ -1,233 +1,193 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-16_14:38:31 +DATE: 2024-05-16_15:58:29 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.548366e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.290418e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.382374e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 -TOTAL : 0.453301 sec +EvtsPerSec[Rmb+ME] (23) = ( 6.319448e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.870912e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.912701e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.415474e+04 +- 1.288238e+04 ) GeV^-2 +TOTAL : 0.332063 sec INFO: No Floating Point Exceptions have been reported - 1,884,361,235 cycles # 2.811 GHz - 2,662,129,036 instructions # 1.41 insn per cycle - 0.727401829 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 248 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 820,248,244 cycles:u # 2.375 GHz (74.42%) + 2,123,048 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.55%) + 5,486,256 stalled-cycles-backend:u # 0.67% backend cycles idle (74.45%) + 1,344,668,467 instructions:u # 1.64 insn per cycle + # 0.00 stalled cycles per insn (74.74%) + 0.378354518 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.381856e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.386346e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.451907e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.630099e+02 +- 4.770719e+02 ) GeV^-2 -TOTAL : 0.498921 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.406311e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.691504e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.696906e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.619620e+05 +- 1.611328e+05 ) GeV^-2 +TOTAL : 0.433658 sec INFO: No Floating Point Exceptions have been reported - 2,065,776,106 cycles # 2.820 GHz - 3,002,526,593 instructions # 1.45 insn per cycle - 0.789720140 seconds time elapsed + 1,194,190,695 cycles:u # 2.611 GHz (74.01%) + 2,261,387 stalled-cycles-frontend:u # 0.19% frontend cycles idle (73.79%) + 4,605,825 stalled-cycles-backend:u # 0.39% backend cycles idle (74.65%) + 1,598,291,024 instructions:u # 1.34 insn per cycle + # 0.00 stalled cycles per insn (76.16%) + 0.511043637 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.412608e+00 -Avg ME (F77/GPU) = 1.4132214346515752 -Relative difference = 0.00043425681546129636 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 1.412404e+00 +Avg ME (F77/GPU) = 1.4131669531526541 +Relative difference = 0.0005401805380429868 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.479714e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.492704e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.492704e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.641350 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 3.257194e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.272653e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.272653e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.724764e+02 +- 2.665343e+02 ) GeV^-2 +TOTAL : 5.057627 sec INFO: No Floating Point Exceptions have been reported - 18,978,826,784 cycles # 2.861 GHz - 58,704,221,037 instructions # 3.09 insn per cycle - 6.645410970 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1029) (avx2: 0) (512y: 0) (512z: 0) + 17,743,836,861 cycles:u # 3.506 GHz (74.94%) + 2,164,346 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.97%) + 2,715,044,606 stalled-cycles-backend:u # 15.30% backend cycles idle (75.03%) + 54,919,157,247 instructions:u # 3.10 insn per cycle + # 0.05 stalled cycles per insn (75.03%) + 5.067815773 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1171) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412986e+00 -Avg ME (F77/C++) = 1.4129858051842916 -Relative difference = 1.3787518662898538e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412998e+00 +Avg ME (F77/C++) = 1.4129978146120550 +Relative difference = 1.3120184529301602e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.494310e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.651898e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.651898e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 1.950028 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.111347e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.129510e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.129510e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.724763e+02 +- 2.665342e+02 ) GeV^-2 +TOTAL : 1.499352 sec INFO: No Floating Point Exceptions have been reported - 5,589,974,968 cycles # 2.862 GHz - 16,510,304,699 instructions # 2.95 insn per cycle - 1.954264273 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5551) (avx2: 0) (512y: 0) (512z: 0) + 5,239,374,949 cycles:u # 3.486 GHz (75.02%) + 1,722,926 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.98%) + 1,316,643,893 stalled-cycles-backend:u # 25.13% backend cycles idle (74.98%) + 16,173,448,663 instructions:u # 3.09 insn per cycle + # 0.08 stalled cycles per insn (74.98%) + 1.506027235 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5136) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412987e+00 -Avg ME (F77/C++) = 1.4129865669244737 -Relative difference = 3.06496469061158e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412986e+00 +Avg ME (F77/C++) = 1.4129857712652836 +Relative difference = 1.618803841657786e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.496639e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.543532e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.543532e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 -TOTAL : 1.116418 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.125955e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.190690e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.190690e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.743733e+02 +- 2.676611e+02 ) GeV^-2 +TOTAL : 0.795721 sec INFO: No Floating Point Exceptions have been reported - 2,975,820,242 cycles # 2.657 GHz - 6,633,799,194 instructions # 2.23 insn per cycle - 1.120575232 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5568) (512y: 0) (512z: 0) + 2,773,616,938 cycles:u # 3.471 GHz (75.03%) + 2,135,374 stalled-cycles-frontend:u # 0.08% frontend cycles idle (74.98%) + 783,207,993 stalled-cycles-backend:u # 28.24% backend cycles idle (74.98%) + 6,663,402,965 instructions:u # 2.40 insn per cycle + # 0.12 stalled cycles per insn (74.98%) + 0.802305848 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5430) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133161655815059 -Relative difference = 1.1715816267550621e-07 +Avg ME (F77/C++) = 1.4133162680784324 +Relative difference = 1.896804623606238e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.615016e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.669374e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.669374e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 -TOTAL : 1.036246 sec -INFO: No Floating Point Exceptions have been reported - 2,759,204,529 cycles # 2.654 GHz - 6,255,102,481 instructions # 2.27 insn per cycle - 1.040401186 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5279) (512y: 25) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133161655815059 -Relative difference = 1.1715816267550621e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.286831e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.322123e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.322123e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.300128 sec -INFO: No Floating Point Exceptions have been reported - 2,231,395,652 cycles # 1.715 GHz - 3,699,704,768 instructions # 1.66 insn per cycle - 1.304305216 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2391) (512y: 29) (512z: 3970) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133164033579249 -Relative difference = 2.85398258307829e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index 18990368c8..5a2a58cd8f 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -1,233 +1,193 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-16_14:38:52 +DATE: 2024-05-16_15:58:43 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.980776e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.047318e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.059891e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.469526 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.880481e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.022557e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.026202e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.872208e+03 +- 2.725298e+03 ) GeV^-2 +TOTAL : 0.389904 sec INFO: No Floating Point Exceptions have been reported - 1,950,532,568 cycles # 2.815 GHz - 2,802,706,395 instructions # 1.44 insn per cycle - 0.749158155 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 1,065,922,222 cycles:u # 2.583 GHz (72.81%) + 2,232,390 stalled-cycles-frontend:u # 0.21% frontend cycles idle (73.88%) + 5,061,186 stalled-cycles-backend:u # 0.47% backend cycles idle (75.21%) + 1,576,301,383 instructions:u # 1.48 insn per cycle + # 0.00 stalled cycles per insn (74.83%) + 0.433335259 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.120585e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.317479e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.329114e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.606344 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.584651e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.841453e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.847217e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.805651e+03 +- 1.746055e+03 ) GeV^-2 +TOTAL : 0.617205 sec INFO: No Floating Point Exceptions have been reported - 2,403,151,636 cycles # 2.824 GHz - 3,669,339,361 instructions # 1.53 insn per cycle - 0.910110717 seconds time elapsed + 1,729,612,449 cycles:u # 2.709 GHz (74.43%) + 2,130,932 stalled-cycles-frontend:u # 0.12% frontend cycles idle (75.07%) + 5,626,909 stalled-cycles-backend:u # 0.33% backend cycles idle (75.07%) + 2,018,755,349 instructions:u # 1.17 insn per cycle + # 0.00 stalled cycles per insn (74.45%) + 0.665623654 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213755569487 -Relative difference = 4.418889885423659e-07 +Avg ME (F77/GPU) = 1.4131213755569483 +Relative difference = 4.4188898885662695e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.348054e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.359694e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.359694e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 7.005029 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.886072e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.897917e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.897917e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 5.705075 sec INFO: No Floating Point Exceptions have been reported - 20,055,951,018 cycles # 2.863 GHz - 60,536,467,053 instructions # 3.02 insn per cycle - 7.009312607 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1399) (avx2: 0) (512y: 0) (512z: 0) + 19,987,696,758 cycles:u # 3.501 GHz (74.93%) + 2,586,000 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.91%) + 3,855,146,728 stalled-cycles-backend:u # 19.29% backend cycles idle (74.98%) + 59,054,891,649 instructions:u # 2.95 insn per cycle + # 0.07 stalled cycles per insn (75.06%) + 5.711965398 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1149) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213859069593 Relative difference = 4.345647726386255e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.638770e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.684822e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.684822e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.556206 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 6.068777e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.120306e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.120306e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 2.727293 sec INFO: No Floating Point Exceptions have been reported - 10,186,602,629 cycles # 2.862 GHz - 30,386,009,701 instructions # 2.98 insn per cycle - 3.560429335 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5280) (avx2: 0) (512y: 0) (512z: 0) + 9,538,124,101 cycles:u # 3.493 GHz (74.79%) + 1,850,579 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.93%) + 2,355,440,883 stalled-cycles-backend:u # 24.70% backend cycles idle (75.10%) + 29,650,788,765 instructions:u # 3.11 insn per cycle + # 0.08 stalled cycles per insn (75.10%) + 2.734010476 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4873) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213792564823 Relative difference = 4.392710025734405e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.050822e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.223334e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.223334e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.833811 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.251088e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.272896e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.272896e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.337074 sec INFO: No Floating Point Exceptions have been reported - 4,877,548,863 cycles # 2.655 GHz - 10,978,535,397 instructions # 2.25 insn per cycle - 1.838126466 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4624) (512y: 0) (512z: 0) + 4,669,732,847 cycles:u # 3.482 GHz (74.96%) + 2,112,477 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.95%) + 1,554,542,175 stalled-cycles-backend:u # 33.29% backend cycles idle (74.95%) + 11,137,824,946 instructions:u # 2.39 insn per cycle + # 0.14 stalled cycles per insn (74.95%) + 1.351531421 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4581) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213600217192 Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.034701e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.056812e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.056812e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.607005 sec -INFO: No Floating Point Exceptions have been reported - 4,285,859,041 cycles # 2.661 GHz - 10,248,085,853 instructions # 2.39 insn per cycle - 1.611327735 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4280) (512y: 82) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213600217192 -Relative difference = 4.5288254008796884e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.675038e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.769490e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.769490e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.480681 sec -INFO: No Floating Point Exceptions have been reported - 4,211,204,679 cycles # 1.695 GHz - 6,044,041,090 instructions # 1.44 insn per cycle - 2.485018889 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2066) (512y: 117) (512z: 3540) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213786174055 -Relative difference = 4.3972324717191576e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt index bea6b18082..38bc847e2e 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt @@ -1,233 +1,193 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-16_14:39:18 +DATE: 2024-05-16_15:58:59 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.940348e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.041869e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.054764e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.469154 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.388163e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.018679e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.022326e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.872208e+03 +- 2.725298e+03 ) GeV^-2 +TOTAL : 0.505575 sec INFO: No Floating Point Exceptions have been reported - 1,946,414,728 cycles # 2.818 GHz - 2,803,423,086 instructions # 1.44 insn per cycle - 0.748059256 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 1,071,722,011 cycles:u # 2.592 GHz (73.97%) + 2,181,864 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.20%) + 5,154,232 stalled-cycles-backend:u # 0.48% backend cycles idle (75.03%) + 1,552,348,456 instructions:u # 1.45 insn per cycle + # 0.00 stalled cycles per insn (74.68%) + 0.551434612 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.116866e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.312173e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.323463e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.604157 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.474005e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.834867e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.839980e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.805651e+03 +- 1.746055e+03 ) GeV^-2 +TOTAL : 0.669724 sec INFO: No Floating Point Exceptions have been reported - 2,374,249,289 cycles # 2.818 GHz - 3,602,148,119 instructions # 1.52 insn per cycle - 0.902621411 seconds time elapsed + 1,712,467,594 cycles:u # 2.689 GHz (74.82%) + 2,209,591 stalled-cycles-frontend:u # 0.13% frontend cycles idle (74.90%) + 5,605,006 stalled-cycles-backend:u # 0.33% backend cycles idle (75.17%) + 2,023,028,753 instructions:u # 1.18 insn per cycle + # 0.00 stalled cycles per insn (75.14%) + 0.721236809 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213755569487 -Relative difference = 4.418889885423659e-07 +Avg ME (F77/GPU) = 1.4131213755569483 +Relative difference = 4.4188898885662695e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.368504e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.380280e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.380280e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.941323 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.908068e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.920040e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.920040e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 5.661916 sec INFO: No Floating Point Exceptions have been reported - 19,878,296,626 cycles # 2.863 GHz - 59,936,362,271 instructions # 3.02 insn per cycle - 6.945573140 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1276) (avx2: 0) (512y: 0) (512z: 0) + 19,854,557,786 cycles:u # 3.504 GHz (75.00%) + 2,361,768 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.01%) + 3,900,457,692 stalled-cycles-backend:u # 19.65% backend cycles idle (75.01%) + 58,617,210,039 instructions:u # 2.95 insn per cycle + # 0.07 stalled cycles per insn (75.01%) + 5.670181282 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1026) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213859069593 Relative difference = 4.345647726386255e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.689994e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.736297e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.736297e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.516340 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 6.270664e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.325536e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.325536e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 2.640204 sec INFO: No Floating Point Exceptions have been reported - 10,077,314,757 cycles # 2.863 GHz - 30,098,117,657 instructions # 2.99 insn per cycle - 3.520635536 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5082) (avx2: 0) (512y: 0) (512z: 0) + 9,230,702,509 cycles:u # 3.491 GHz (74.89%) + 2,254,879 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.89%) + 1,809,915,298 stalled-cycles-backend:u # 19.61% backend cycles idle (74.90%) + 30,150,647,518 instructions:u # 3.27 insn per cycle + # 0.06 stalled cycles per insn (75.02%) + 2.654880754 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4944) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213792564823 Relative difference = 4.392710025734405e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.778247e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.940877e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.940877e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.889938 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.225853e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.246775e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.246775e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.363889 sec INFO: No Floating Point Exceptions have been reported - 5,023,754,472 cycles # 2.654 GHz - 11,483,522,538 instructions # 2.29 insn per cycle - 1.894205310 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4723) (512y: 0) (512z: 0) + 4,768,365,786 cycles:u # 3.487 GHz (74.89%) + 2,082,915 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.85%) + 1,538,018,459 stalled-cycles-backend:u # 32.25% backend cycles idle (74.85%) + 11,628,498,602 instructions:u # 2.44 insn per cycle + # 0.13 stalled cycles per insn (74.85%) + 1.370276155 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4685) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213600217192 Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.644687e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.842226e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.842226e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.722080 sec -INFO: No Floating Point Exceptions have been reported - 4,590,091,342 cycles # 2.660 GHz - 10,809,457,257 instructions # 2.35 insn per cycle - 1.726406566 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4285) (512y: 234) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213600217192 -Relative difference = 4.5288254008796884e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.641517e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.735645e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.735645e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.492729 sec -INFO: No Floating Point Exceptions have been reported - 4,229,101,372 cycles # 1.695 GHz - 6,273,394,761 instructions # 1.48 insn per cycle - 2.496999493 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1961) (512y: 163) (512z: 3617) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213786174055 -Relative difference = 4.3972324717191576e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index adf6424639..f745f0bfe4 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -1,233 +1,193 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_14:39:44 +DATE: 2024-05-16_15:59:15 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.453895e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.477096e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.479397e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.529744 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.240694e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.470445e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.471175e+04 ) sec^-1 +MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 +TOTAL : 0.647569 sec INFO: No Floating Point Exceptions have been reported - 2,179,317,048 cycles # 2.822 GHz - 3,403,036,461 instructions # 1.56 insn per cycle - 0.830470867 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 1,917,427,552 cycles:u # 2.918 GHz (74.21%) + 2,068,160 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.28%) + 5,748,928 stalled-cycles-backend:u # 0.30% backend cycles idle (75.52%) + 2,099,178,567 instructions:u # 1.09 insn per cycle + # 0.00 stalled cycles per insn (75.85%) + 0.694887363 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.124157e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.151338e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.152519e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.042150 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.242927e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.245498e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.245555e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 +TOTAL : 7.695051 sec INFO: No Floating Point Exceptions have been reported - 9,405,604,432 cycles # 2.853 GHz - 20,118,562,201 instructions # 2.14 insn per cycle - 3.353608047 seconds time elapsed + 26,568,387,560 cycles:u # 3.439 GHz (74.88%) + 3,047,402 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.01%) + 5,977,774 stalled-cycles-backend:u # 0.02% backend cycles idle (75.05%) + 21,140,043,277 instructions:u # 0.80 insn per cycle + # 0.00 stalled cycles per insn (75.05%) + 7.749841650 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158133E-004 -Relative difference = 2.837296512218831e-07 +Avg ME (F77/GPU) = 6.6266731198158101E-004 +Relative difference = 2.837296517127185e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.820592e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.821434e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.821434e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 9.018372 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.959223e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.960008e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.960008e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 8.384500 sec INFO: No Floating Point Exceptions have been reported - 25,614,013,948 cycles # 2.839 GHz - 78,938,013,495 instructions # 3.08 insn per cycle - 9.022664733 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4893) (avx2: 0) (512y: 0) (512z: 0) + 25,871,962,772 cycles:u # 3.084 GHz (74.96%) + 5,483,137 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.02%) + 3,011,151,668 stalled-cycles-backend:u # 11.64% backend cycles idle (75.02%) + 81,653,061,558 instructions:u # 3.16 insn per cycle + # 0.04 stalled cycles per insn (75.02%) + 8.392660557 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 6614) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.519494e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.522699e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.522699e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.669138 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 4.436753e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.440754e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.440754e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 3.710024 sec INFO: No Floating Point Exceptions have been reported - 12,898,966,245 cycles # 2.761 GHz - 39,280,150,365 instructions # 3.05 insn per cycle - 4.673492352 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13184) (avx2: 0) (512y: 0) (512z: 0) + 11,426,095,552 cycles:u # 3.076 GHz (75.02%) + 766,709 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.02%) + 1,377,794,981 stalled-cycles-backend:u # 12.06% backend cycles idle (75.02%) + 39,154,218,221 instructions:u # 3.43 insn per cycle + # 0.04 stalled cycles per insn (75.02%) + 3.718815324 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:12814) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.859599e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.875346e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.875346e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.097013 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.058588e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.060862e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.060862e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.562917 sec INFO: No Floating Point Exceptions have been reported - 5,574,685,577 cycles # 2.655 GHz - 13,685,856,406 instructions # 2.46 insn per cycle - 2.101249976 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) + 4,813,152,911 cycles:u # 3.071 GHz (74.86%) + 414,825 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.99%) + 543,029,817 stalled-cycles-backend:u # 11.28% backend cycles idle (74.99%) + 13,718,682,409 instructions:u # 2.85 insn per cycle + # 0.04 stalled cycles per insn (74.99%) + 1.570777631 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11059) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 +Avg ME (F77/C++) = 6.6266731198157309E-004 +Relative difference = 2.837296636563793e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.915800e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.935807e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.935807e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.848754 sec -INFO: No Floating Point Exceptions have been reported - 4,887,101,603 cycles # 2.639 GHz - 12,341,123,817 instructions # 2.53 insn per cycle - 1.853060894 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.728417e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.739729e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.739729e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.446881 sec -INFO: No Floating Point Exceptions have been reported - 4,107,098,137 cycles # 1.676 GHz - 6,336,202,498 instructions # 1.54 insn per cycle - 2.451096147 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt index 92636e2555..2ac03592db 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt @@ -1,250 +1,204 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_15:02:53 +DATE: 2024-05-16_16:28:19 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.094987e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.434034e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.434034e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.523594 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.299483e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.450217e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.450217e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 0.698110 sec INFO: No Floating Point Exceptions have been reported - 2,118,517,608 cycles # 2.813 GHz - 3,348,276,596 instructions # 1.58 insn per cycle - 0.813391390 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 1,943,776,735 cycles:u # 2.766 GHz (74.48%) + 2,561,579 stalled-cycles-frontend:u # 0.13% frontend cycles idle (75.45%) + 48,587,924 stalled-cycles-backend:u # 2.50% backend cycles idle (75.62%) + 2,213,319,398 instructions:u # 1.14 insn per cycle + # 0.02 stalled cycles per insn (75.15%) + 0.744784576 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.622834e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.121853e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.121853e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.329713 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.205592e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.241191e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.241191e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 +TOTAL : 8.610558 sec INFO: No Floating Point Exceptions have been reported - 10,291,111,145 cycles # 2.854 GHz - 21,714,903,322 instructions # 2.11 insn per cycle - 3.660758937 seconds time elapsed + 28,067,417,419 cycles:u # 3.264 GHz (74.91%) + 21,383,596 stalled-cycles-frontend:u # 0.08% frontend cycles idle (74.93%) + 1,133,454,846 stalled-cycles-backend:u # 4.04% backend cycles idle (74.97%) + 22,563,060,022 instructions:u # 0.80 insn per cycle + # 0.05 stalled cycles per insn (75.02%) + 8.675229690 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158133E-004 -Relative difference = 2.837296512218831e-07 +Avg ME (F77/GPU) = 6.6266731198158101E-004 +Relative difference = 2.837296517127185e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.836126e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.837051e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.837051e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.946025 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.160540e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.161393e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.161393e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 7.606452 sec INFO: No Floating Point Exceptions have been reported - 25,625,027,072 cycles # 2.863 GHz - 78,943,584,564 instructions # 3.08 insn per cycle - 8.950491990 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4893) (avx2: 0) (512y: 0) (512z: 0) + 26,144,739,034 cycles:u # 3.435 GHz (74.99%) + 24,042,283 stalled-cycles-frontend:u # 0.09% frontend cycles idle (74.99%) + 3,442,166,079 stalled-cycles-backend:u # 13.17% backend cycles idle (74.99%) + 81,651,638,110 instructions:u # 3.12 insn per cycle + # 0.04 stalled cycles per insn (74.98%) + 7.622588272 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 6614) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.512313e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.515690e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.515690e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.682891 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 5.017452e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.022007e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.022007e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 3.284283 sec INFO: No Floating Point Exceptions have been reported - 12,903,818,271 cycles # 2.754 GHz - 39,293,324,950 instructions # 3.05 insn per cycle - 4.687529036 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13184) (avx2: 0) (512y: 0) (512z: 0) + 11,484,312,105 cycles:u # 3.493 GHz (74.95%) + 847,543 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.94%) + 1,391,797,034 stalled-cycles-backend:u # 12.12% backend cycles idle (74.94%) + 39,232,489,464 instructions:u # 3.42 insn per cycle + # 0.04 stalled cycles per insn (74.94%) + 3.291507424 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:12814) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.867831e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.884189e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.884189e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.098804 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.197643e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.200220e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.200220e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.384891 sec INFO: No Floating Point Exceptions have been reported - 5,587,651,201 cycles # 2.658 GHz - 13,696,262,775 instructions # 2.45 insn per cycle - 2.103410758 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) + 4,845,546,394 cycles:u # 3.488 GHz (74.68%) + 555,916 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.73%) + 601,492,271 stalled-cycles-backend:u # 12.41% backend cycles idle (75.02%) + 13,720,833,344 instructions:u # 2.83 insn per cycle + # 0.04 stalled cycles per insn (75.24%) + 1.392181254 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11059) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 +Avg ME (F77/C++) = 6.6266731198157309E-004 +Relative difference = 2.837296636563793e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.952196e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.973818e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.973818e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.845912 sec -INFO: No Floating Point Exceptions have been reported - 4,903,860,646 cycles # 2.651 GHz - 12,352,108,328 instructions # 2.52 insn per cycle - 1.850421022 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.711524e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.723541e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.723541e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.457485 sec -INFO: No Floating Point Exceptions have been reported - 4,130,677,154 cycles # 1.678 GHz - 6,346,127,118 instructions # 1.54 insn per cycle - 2.462055019 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt index 07bc3b6c73..b1a908cb26 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt @@ -1,233 +1,193 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_15:12:42 +DATE: 2024-05-16_16:34:45 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --common OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.490501e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.518177e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.520849e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.316994e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.477450e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.478126e+04 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.515008 sec +TOTAL : 0.658412 sec INFO: No Floating Point Exceptions have been reported - 2,117,861,647 cycles # 2.847 GHz - 3,355,581,223 instructions # 1.58 insn per cycle - 0.805282012 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 1,952,426,785 cycles:u # 2.906 GHz (74.78%) + 2,611,996 stalled-cycles-frontend:u # 0.13% frontend cycles idle (74.90%) + 49,330,094 stalled-cycles-backend:u # 2.53% backend cycles idle (75.08%) + 2,198,477,186 instructions:u # 1.13 insn per cycle + # 0.02 stalled cycles per insn (75.14%) + 0.699140909 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.120060e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.152876e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.154244e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.238511e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.241352e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.241408e+05 ) sec^-1 MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 -TOTAL : 3.146209 sec +TOTAL : 8.406942 sec INFO: No Floating Point Exceptions have been reported - 9,794,350,225 cycles # 2.878 GHz - 20,567,996,876 instructions # 2.10 insn per cycle - 3.458179285 seconds time elapsed + 28,876,407,216 cycles:u # 3.423 GHz (74.90%) + 11,676,178 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.97%) + 1,120,027,574 stalled-cycles-backend:u # 3.88% backend cycles idle (75.01%) + 22,611,604,116 instructions:u # 0.78 insn per cycle + # 0.05 stalled cycles per insn (75.02%) + 8.461854871 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158133E-004 -Relative difference = 2.837296512218831e-07 +Avg ME (F77/GPU) = 6.6266731198158101E-004 +Relative difference = 2.837296517127185e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.854249e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.855163e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.855163e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.189529e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.190387e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.190387e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 8.856220 sec +TOTAL : 7.502176 sec INFO: No Floating Point Exceptions have been reported - 25,606,958,110 cycles # 2.890 GHz - 78,936,876,492 instructions # 3.08 insn per cycle - 8.860490718 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4893) (avx2: 0) (512y: 0) (512z: 0) + 26,067,483,019 cycles:u # 3.474 GHz (74.95%) + 24,047,816 stalled-cycles-frontend:u # 0.09% frontend cycles idle (74.95%) + 3,393,470,564 stalled-cycles-backend:u # 13.02% backend cycles idle (74.98%) + 81,696,784,837 instructions:u # 3.13 insn per cycle + # 0.04 stalled cycles per insn (75.03%) + 7.513935591 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 6614) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.547585e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.550823e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.550823e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 5.016907e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.021312e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.021312e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 4.633866 sec +TOTAL : 3.280765 sec INFO: No Floating Point Exceptions have been reported - 12,886,616,952 cycles # 2.779 GHz - 39,279,548,039 instructions # 3.05 insn per cycle - 4.638052623 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13184) (avx2: 0) (512y: 0) (512z: 0) + 11,488,529,274 cycles:u # 3.499 GHz (74.91%) + 695,887 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.91%) + 1,564,018,994 stalled-cycles-backend:u # 13.61% backend cycles idle (74.91%) + 39,228,204,285 instructions:u # 3.41 insn per cycle + # 0.04 stalled cycles per insn (74.95%) + 3.284992005 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:12814) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.950793e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.966539e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.966539e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.179905e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.182395e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.182395e+04 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 2.073967 sec +TOTAL : 1.401951 sec INFO: No Floating Point Exceptions have been reported - 5,577,712,569 cycles # 2.685 GHz - 13,684,498,611 instructions # 2.45 insn per cycle - 2.078154877 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) + 4,819,349,124 cycles:u # 3.431 GHz (74.95%) + 451,954 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.94%) + 596,939,385 stalled-cycles-backend:u # 12.39% backend cycles idle (74.94%) + 13,715,502,656 instructions:u # 2.85 insn per cycle + # 0.04 stalled cycles per insn (74.94%) + 1.406063146 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11059) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 +Avg ME (F77/C++) = 6.6266731198157309E-004 +Relative difference = 2.837296636563793e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.068596e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.089664e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.089664e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.820316 sec -INFO: No Floating Point Exceptions have been reported - 4,894,997,970 cycles # 2.684 GHz - 12,339,079,686 instructions # 2.52 insn per cycle - 1.824557454 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.817590e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.829323e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.829323e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 2.417091 sec -INFO: No Floating Point Exceptions have been reported - 4,131,104,953 cycles # 1.707 GHz - 6,332,486,091 instructions # 1.53 insn per cycle - 2.421265188 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt index b300efd9c0..a0e80c25fb 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt @@ -1,236 +1,195 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_15:07:12 +DATE: 2024-05-16_16:32:35 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.175456e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.487401e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.489887e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.516778 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.344131e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.490608e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.491379e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 0.681519 sec INFO: No Floating Point Exceptions have been reported - 2,106,838,284 cycles # 2.817 GHz - 3,334,047,065 instructions # 1.58 insn per cycle - 0.806903831 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 1,974,279,625 cycles:u # 2.884 GHz (73.78%) + 2,954,441 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.98%) + 35,227,763 stalled-cycles-backend:u # 1.78% backend cycles idle (75.37%) + 2,180,318,281 instructions:u # 1.10 insn per cycle + # 0.02 stalled cycles per insn (75.43%) + 0.724160088 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.725415e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.181222e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.182613e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.208931 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.212854e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.246626e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.246682e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 +TOTAL : 8.555132 sec INFO: No Floating Point Exceptions have been reported - 9,884,616,856 cycles # 2.852 GHz - 22,569,706,597 instructions # 2.28 insn per cycle - 3.521271497 seconds time elapsed + 28,616,243,213 cycles:u # 3.340 GHz (74.87%) + 23,129,446 stalled-cycles-frontend:u # 0.08% frontend cycles idle (74.95%) + 1,128,438,495 stalled-cycles-backend:u # 3.94% backend cycles idle (75.04%) + 22,952,283,801 instructions:u # 0.80 insn per cycle + # 0.05 stalled cycles per insn (75.08%) + 8.616154337 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158133E-004 -Relative difference = 2.837296512218831e-07 +Avg ME (F77/GPU) = 6.6266731198158101E-004 +Relative difference = 2.837296517127185e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.838805e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.839662e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.839662e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.928794 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.131810e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.132645e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.132645e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 7.706205 sec INFO: No Floating Point Exceptions have been reported - 25,578,535,475 cycles # 2.864 GHz - 78,941,438,017 instructions # 3.09 insn per cycle - 8.932959256 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4893) (avx2: 0) (512y: 0) (512z: 0) + 26,078,373,962 cycles:u # 3.383 GHz (74.99%) + 25,079,225 stalled-cycles-frontend:u # 0.10% frontend cycles idle (74.99%) + 3,510,795,188 stalled-cycles-backend:u # 13.46% backend cycles idle (74.99%) + 81,685,715,238 instructions:u # 3.13 insn per cycle + # 0.04 stalled cycles per insn (74.99%) + 7.710674176 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 6614) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.490750e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.493870e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.493870e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.708141 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 4.865797e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.870539e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.870539e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 3.383410 sec INFO: No Floating Point Exceptions have been reported - 12,873,433,154 cycles # 2.733 GHz - 39,280,620,994 instructions # 3.05 insn per cycle - 4.712353785 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13184) (avx2: 0) (512y: 0) (512z: 0) + 11,476,947,568 cycles:u # 3.389 GHz (74.97%) + 717,345 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.96%) + 1,394,339,634 stalled-cycles-backend:u # 12.15% backend cycles idle (74.96%) + 39,175,337,651 instructions:u # 3.41 insn per cycle + # 0.04 stalled cycles per insn (74.96%) + 3.387986892 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:12814) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.853620e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.869024e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.869024e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.097850 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.152744e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.155057e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.155057e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.434251 sec INFO: No Floating Point Exceptions have been reported - 5,573,477,429 cycles # 2.652 GHz - 13,685,909,410 instructions # 2.46 insn per cycle - 2.102047066 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) + 4,860,085,854 cycles:u # 3.382 GHz (74.95%) + 414,269 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.95%) + 598,023,717 stalled-cycles-backend:u # 12.30% backend cycles idle (74.95%) + 13,704,018,164 instructions:u # 2.82 insn per cycle + # 0.04 stalled cycles per insn (74.95%) + 1.438315794 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11059) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 +Avg ME (F77/C++) = 6.6266731198157309E-004 +Relative difference = 2.837296636563793e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.970623e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.991129e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.991129e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.837793 sec -INFO: No Floating Point Exceptions have been reported - 4,885,535,539 cycles # 2.653 GHz - 12,340,762,979 instructions # 2.53 insn per cycle - 1.841998870 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.715803e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.727367e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.727367e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.451983 sec -INFO: No Floating Point Exceptions have been reported - 4,110,713,398 cycles # 1.674 GHz - 6,334,867,690 instructions # 1.54 insn per cycle - 2.456147392 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt index 254c65fd8c..98f971e8da 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt @@ -1,233 +1,193 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_14:40:17 +DATE: 2024-05-16_15:59:44 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.472040e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.495257e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.497568e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.530193 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.383801e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.441172e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.441561e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 +TOTAL : 0.581765 sec INFO: No Floating Point Exceptions have been reported - 2,179,825,483 cycles # 2.820 GHz - 3,416,926,116 instructions # 1.57 insn per cycle - 0.832303660 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 1,420,717,109 cycles:u # 2.446 GHz (73.59%) + 2,253,860 stalled-cycles-frontend:u # 0.16% frontend cycles idle (75.62%) + 6,648,355 stalled-cycles-backend:u # 0.47% backend cycles idle (75.84%) + 1,777,674,227 instructions:u # 1.25 insn per cycle + # 0.00 stalled cycles per insn (74.83%) + 0.643584224 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.149957e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.177471e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.178689e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.025570 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.739960e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.745030e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.745144e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 +TOTAL : 6.374862 sec INFO: No Floating Point Exceptions have been reported - 9,343,829,120 cycles # 2.851 GHz - 20,017,847,921 instructions # 2.14 insn per cycle - 3.337093329 seconds time elapsed + 19,281,030,721 cycles:u # 3.009 GHz (74.90%) + 2,979,566 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.92%) + 5,145,908 stalled-cycles-backend:u # 0.03% backend cycles idle (74.99%) + 15,515,306,743 instructions:u # 0.80 insn per cycle + # 0.00 stalled cycles per insn (75.03%) + 6.432094557 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158133E-004 -Relative difference = 2.837296512218831e-07 +Avg ME (F77/GPU) = 6.6266731198158101E-004 +Relative difference = 2.837296517127185e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.844549e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.845438e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.845438e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.900941 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.965819e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.966613e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.966613e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 8.355971 sec INFO: No Floating Point Exceptions have been reported - 25,492,945,375 cycles # 2.863 GHz - 78,715,017,784 instructions # 3.09 insn per cycle - 8.905151100 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4264) (avx2: 0) (512y: 0) (512z: 0) + 25,786,448,242 cycles:u # 3.084 GHz (74.95%) + 1,280,141 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.99%) + 2,966,784,019 stalled-cycles-backend:u # 11.51% backend cycles idle (75.03%) + 81,685,731,850 instructions:u # 3.17 insn per cycle + # 0.04 stalled cycles per insn (75.03%) + 8.364397457 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 6589) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.432714e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.435728e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.435728e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.786568 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 5.039107e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.043696e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.043696e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 3.266114 sec INFO: No Floating Point Exceptions have been reported - 12,968,671,480 cycles # 2.709 GHz - 39,227,279,421 instructions # 3.02 insn per cycle - 4.790848376 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:12951) (avx2: 0) (512y: 0) (512z: 0) + 11,454,279,152 cycles:u # 3.503 GHz (74.91%) + 713,745 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.02%) + 1,542,827,344 stalled-cycles-backend:u # 13.47% backend cycles idle (75.05%) + 39,150,001,362 instructions:u # 3.42 insn per cycle + # 0.04 stalled cycles per insn (75.05%) + 3.272776435 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:12771) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.791500e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.806568e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.806568e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.114108 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.193623e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.196165e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.196165e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.385365 sec INFO: No Floating Point Exceptions have been reported - 5,617,875,214 cycles # 2.653 GHz - 13,801,216,605 instructions # 2.46 insn per cycle - 2.118326582 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11422) (512y: 0) (512z: 0) + 4,853,097,891 cycles:u # 3.494 GHz (74.66%) + 2,985,761 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.84%) + 585,936,021 stalled-cycles-backend:u # 12.07% backend cycles idle (75.13%) + 13,737,946,664 instructions:u # 2.83 insn per cycle + # 0.04 stalled cycles per insn (75.23%) + 1.392136375 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11048) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 +Avg ME (F77/C++) = 6.6266731198157309E-004 +Relative difference = 2.837296636563793e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.808696e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.827867e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.827867e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.871093 sec -INFO: No Floating Point Exceptions have been reported - 4,977,184,975 cycles # 2.656 GHz - 12,467,160,434 instructions # 2.50 insn per cycle - 1.875328468 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10258) (512y: 240) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.708154e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.719459e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.719459e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.454314 sec -INFO: No Floating Point Exceptions have been reported - 4,118,637,907 cycles # 1.676 GHz - 6,458,862,875 instructions # 1.57 insn per cycle - 2.458530246 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1647) (512y: 192) (512z: 9375) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt index 452f4e853d..c204930b95 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt @@ -1,233 +1,193 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_14:53:22 +DATE: 2024-05-16_16:15:57 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 64 256 1 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.253411e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.278108e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.280152e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.538797 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.333504e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.473180e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.473971e+04 ) sec^-1 +MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 +TOTAL : 0.641454 sec INFO: No Floating Point Exceptions have been reported - 2,198,780,840 cycles # 2.857 GHz - 3,392,092,682 instructions # 1.54 insn per cycle - 0.826434194 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 1,921,374,787 cycles:u # 2.944 GHz (74.31%) + 2,218,794 stalled-cycles-frontend:u # 0.12% frontend cycles idle (74.46%) + 5,689,437 stalled-cycles-backend:u # 0.30% backend cycles idle (75.40%) + 2,098,072,606 instructions:u # 1.09 insn per cycle + # 0.00 stalled cycles per insn (75.62%) + 0.686609136 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.756018e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.782691e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.783822e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.316768 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.243251e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.246011e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.246068e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 +TOTAL : 7.730002 sec INFO: No Floating Point Exceptions have been reported - 10,315,360,608 cycles # 2.881 GHz - 23,624,745,879 instructions # 2.29 insn per cycle - 3.638219909 seconds time elapsed + 26,412,911,991 cycles:u # 3.420 GHz (75.01%) + 3,115,612 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.04%) + 6,161,858 stalled-cycles-backend:u # 0.02% backend cycles idle (75.05%) + 21,073,639,603 instructions:u # 0.80 insn per cycle + # 0.00 stalled cycles per insn (75.05%) + 7.782665100 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158122E-004 -Relative difference = 2.837296513854949e-07 +Avg ME (F77/GPU) = 6.6266731198158101E-004 +Relative difference = 2.837296517127185e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.179521e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.179964e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.179964e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 39.248920 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 4.500085e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.500454e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.500454e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 36.455677 sec INFO: No Floating Point Exceptions have been reported - 113,511,319,041 cycles # 2.892 GHz - 144,820,446,927 instructions # 1.28 insn per cycle - 39.253177511 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:21353) (avx2: 0) (512y: 0) (512z: 0) + 126,603,172,584 cycles:u # 3.473 GHz (74.99%) + 52,649,839 stalled-cycles-frontend:u # 0.04% frontend cycles idle (75.00%) + 16,830,397,272 stalled-cycles-backend:u # 13.29% backend cycles idle (75.01%) + 140,984,420,050 instructions:u # 1.11 insn per cycle + # 0.12 stalled cycles per insn (75.01%) + 36.462809036 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:21092) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198140450E-004 -Relative difference = 2.83729918072716e-07 +Avg ME (F77/C++) = 6.6266731198140461E-004 +Relative difference = 2.8372991790910424e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.047626e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.050057e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.050057e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 5.390362 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 3.620673e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.623024e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.623024e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 4.541767 sec INFO: No Floating Point Exceptions have been reported - 14,740,564,650 cycles # 2.733 GHz - 37,575,494,329 instructions # 2.55 insn per cycle - 5.394647902 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:68119) (avx2: 0) (512y: 0) (512z: 0) + 15,788,847,464 cycles:u # 3.474 GHz (75.01%) + 847,416 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.01%) + 7,459,545,585 stalled-cycles-backend:u # 47.25% backend cycles idle (75.01%) + 37,454,674,896 instructions:u # 2.37 insn per cycle + # 0.20 stalled cycles per insn (75.01%) + 4.548435574 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:68052) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198141209E-004 -Relative difference = 2.8372990661989057e-07 +Avg ME (F77/C++) = 6.6266731198141220E-004 +Relative difference = 2.837299064562788e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.230737e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.243892e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.243892e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.278344 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 7.376401e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.385763e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.385763e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 2.236058 sec INFO: No Floating Point Exceptions have been reported - 6,134,003,628 cycles # 2.689 GHz - 13,061,930,844 instructions # 2.13 insn per cycle - 2.282738143 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:46960) (512y: 0) (512z: 0) + 7,694,540,236 cycles:u # 3.435 GHz (75.03%) + 4,856,635 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.00%) + 4,403,069,372 stalled-cycles-backend:u # 57.22% backend cycles idle (75.00%) + 12,893,293,961 instructions:u # 1.68 insn per cycle + # 0.34 stalled cycles per insn (75.00%) + 2.242659070 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:46593) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198156789E-004 -Relative difference = 2.837296715097453e-07 +Avg ME (F77/C++) = 6.6266731198156778E-004 +Relative difference = 2.837296716733571e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.779670e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.799133e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.799133e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.877722 sec -INFO: No Floating Point Exceptions have been reported - 5,068,047,565 cycles # 2.694 GHz - 11,440,450,267 instructions # 2.26 insn per cycle - 1.882139324 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:40434) (512y: 285) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198156789E-004 -Relative difference = 2.837296715097453e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.093705e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.106755e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.106755e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.321771 sec -INFO: No Floating Point Exceptions have been reported - 3,974,444,581 cycles # 1.709 GHz - 5,942,873,144 instructions # 1.50 insn per cycle - 2.326156002 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2455) (512y: 337) (512z:39411) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198156789E-004 -Relative difference = 2.837296715097453e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt index 00ea23e18d..82dbeb63c7 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt @@ -1,233 +1,193 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_14:54:29 +DATE: 2024-05-16_16:16:58 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 64 256 1 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.259147e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.284136e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.286360e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.536782 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.381471e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.432629e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.432855e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 +TOTAL : 0.533772 sec INFO: No Floating Point Exceptions have been reported - 2,193,506,190 cycles # 2.857 GHz - 3,337,314,407 instructions # 1.52 insn per cycle - 0.824492176 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 1,495,230,532 cycles:u # 2.798 GHz (74.57%) + 2,178,277 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.79%) + 5,373,727 stalled-cycles-backend:u # 0.36% backend cycles idle (75.94%) + 1,821,435,799 instructions:u # 1.22 insn per cycle + # 0.00 stalled cycles per insn (76.03%) + 0.579795308 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.761556e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.788263e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.789425e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.301197 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.738658e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.743473e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.743590e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 +TOTAL : 6.543267 sec INFO: No Floating Point Exceptions have been reported - 10,264,886,616 cycles # 2.886 GHz - 23,377,018,059 instructions # 2.28 insn per cycle - 3.615104997 seconds time elapsed + 21,689,463,641 cycles:u # 3.410 GHz (74.90%) + 2,829,483 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.01%) + 5,900,164 stalled-cycles-backend:u # 0.03% backend cycles idle (74.97%) + 17,423,262,017 instructions:u # 0.80 insn per cycle + # 0.00 stalled cycles per insn (74.99%) + 6.597249694 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158122E-004 -Relative difference = 2.837296513854949e-07 +Avg ME (F77/GPU) = 6.6266731198158101E-004 +Relative difference = 2.837296517127185e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.170908e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.171353e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.171353e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 39.330349 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 4.357215e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.357558e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.357558e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 37.650036 sec INFO: No Floating Point Exceptions have been reported - 113,688,017,774 cycles # 2.891 GHz - 144,788,018,158 instructions # 1.27 insn per cycle - 39.334720458 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:20719) (avx2: 0) (512y: 0) (512z: 0) + 128,096,240,554 cycles:u # 3.402 GHz (75.00%) + 161,153,961 stalled-cycles-frontend:u # 0.13% frontend cycles idle (74.99%) + 17,514,129,534 stalled-cycles-backend:u # 13.67% backend cycles idle (74.99%) + 141,505,351,633 instructions:u # 1.10 insn per cycle + # 0.12 stalled cycles per insn (74.99%) + 37.674452732 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:21725) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198140461E-004 Relative difference = 2.8372991790910424e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.974783e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.977013e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.977013e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 5.522774 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 3.399478e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.401773e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.401773e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 4.836726 sec INFO: No Floating Point Exceptions have been reported - 15,220,566,650 cycles # 2.755 GHz - 37,763,046,074 instructions # 2.48 insn per cycle - 5.527045303 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:68447) (avx2: 0) (512y: 0) (512z: 0) + 16,174,975,807 cycles:u # 3.342 GHz (74.92%) + 11,299,672 stalled-cycles-frontend:u # 0.07% frontend cycles idle (75.00%) + 6,032,416,709 stalled-cycles-backend:u # 37.29% backend cycles idle (75.05%) + 37,536,936,894 instructions:u # 2.32 insn per cycle + # 0.16 stalled cycles per insn (75.05%) + 4.844983395 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:68056) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198141209E-004 -Relative difference = 2.8372990661989057e-07 +Avg ME (F77/C++) = 6.6266731198141220E-004 +Relative difference = 2.837299064562788e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.412795e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.426610e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.426610e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.222010 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 7.532310e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.542495e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.542495e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 2.189187 sec INFO: No Floating Point Exceptions have been reported - 6,000,419,836 cycles # 2.696 GHz - 12,896,174,142 instructions # 2.15 insn per cycle - 2.226315650 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:45929) (512y: 0) (512z: 0) + 7,597,122,223 cycles:u # 3.464 GHz (74.83%) + 2,836,861 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.94%) + 4,216,614,115 stalled-cycles-backend:u # 55.50% backend cycles idle (75.12%) + 12,778,244,268 instructions:u # 1.68 insn per cycle + # 0.33 stalled cycles per insn (75.20%) + 2.196413871 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:45663) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198156789E-004 -Relative difference = 2.837296715097453e-07 +Avg ME (F77/C++) = 6.6266731198156778E-004 +Relative difference = 2.837296716733571e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.743711e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.762861e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.762861e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.885038 sec -INFO: No Floating Point Exceptions have been reported - 5,086,798,971 cycles # 2.694 GHz - 11,447,968,989 instructions # 2.25 insn per cycle - 1.889284279 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:40123) (512y: 219) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198156789E-004 -Relative difference = 2.837296715097453e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.141072e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.153903e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.153903e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.306249 sec -INFO: No Floating Point Exceptions have been reported - 3,947,559,408 cycles # 1.709 GHz - 5,896,754,674 instructions # 1.49 insn per cycle - 2.310527958 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1971) (512y: 259) (512z:38937) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198156789E-004 -Relative difference = 2.837296715097453e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 15bbe59069..988b54655f 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -1,233 +1,193 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_14:40:50 +DATE: 2024-05-16_16:00:11 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.326887e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.370559e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.376371e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.487782 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.455591e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.730172e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.730916e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.535870e-02 +- 4.279978e-02 ) GeV^-4 +TOTAL : 0.461829 sec INFO: No Floating Point Exceptions have been reported - 1,984,813,926 cycles # 2.807 GHz - 2,933,686,219 instructions # 1.48 insn per cycle - 0.764328783 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 1,181,103,355 cycles:u # 2.678 GHz (72.77%) + 2,044,415 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.83%) + 5,372,072 stalled-cycles-backend:u # 0.45% backend cycles idle (75.49%) + 1,609,274,886 instructions:u # 1.36 insn per cycle + # 0.00 stalled cycles per insn (74.63%) + 0.543216903 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.584549e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.644337e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.647136e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.720884 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.698964e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.725804e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.726247e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.763201e+01 +- 6.205778e+01 ) GeV^-4 +TOTAL : 2.611783 sec INFO: No Floating Point Exceptions have been reported - 5,560,374,951 cycles # 2.849 GHz - 11,900,809,748 instructions # 2.14 insn per cycle - 2.008088048 seconds time elapsed + 8,754,892,668 cycles:u # 3.324 GHz (75.10%) + 2,394,917 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.04%) + 4,639,328 stalled-cycles-backend:u # 0.05% backend cycles idle (75.01%) + 7,446,007,909 instructions:u # 0.85 insn per cycle + # 0.00 stalled cycles per insn (74.91%) + 2.657197180 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626454e-04 -Avg ME (F77/GPU) = 6.6262659968156085E-004 -Relative difference = 2.8371612387547027e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 6.626791e-04 +Avg ME (F77/GPU) = 6.6270899361878938E-004 +Relative difference = 4.511024836808726e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.909633e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.910547e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.910547e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.596251 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.469441e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.470531e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.470531e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208458e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 6.651593 sec INFO: No Floating Point Exceptions have been reported - 24,624,004,022 cycles # 2.864 GHz - 78,129,381,217 instructions # 3.17 insn per cycle - 8.600293639 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3603) (avx2: 0) (512y: 0) (512z: 0) + 23,317,370,497 cycles:u # 3.504 GHz (74.98%) + 948,384 stalled-cycles-frontend:u # 0.00% frontend cycles idle (75.00%) + 3,079,063,251 stalled-cycles-backend:u # 13.21% backend cycles idle (75.00%) + 75,776,373,603 instructions:u # 3.25 insn per cycle + # 0.04 stalled cycles per insn (75.00%) + 6.660009095 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3898) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274863266294753E-004 -Relative difference = 4.92840687132121e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627487e-04 +Avg ME (F77/C++) = 6.6274866115424713E-004 +Relative difference = 5.861309557415831e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.891953e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.904635e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.904635e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.387612 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 9.913769e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.931483e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.931483e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 1.664463 sec INFO: No Floating Point Exceptions have been reported - 6,469,659,104 cycles # 2.706 GHz - 20,120,611,338 instructions # 3.11 insn per cycle - 2.391816623 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) + 5,806,934,221 cycles:u # 3.482 GHz (75.06%) + 546,050 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.06%) + 816,845,040 stalled-cycles-backend:u # 14.07% backend cycles idle (75.06%) + 20,041,870,445 instructions:u # 3.45 insn per cycle + # 0.04 stalled cycles per insn (75.06%) + 1.670978010 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13237) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274861460025036E-004 -Relative difference = 2.2029847170826283e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627485e-04 +Avg ME (F77/C++) = 6.6274845946848876E-004 +Relative difference = 6.115670001294808e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.562010e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.568248e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.568248e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.059004 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.367496e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.377823e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.377823e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214980e-01 +- 3.255523e-01 ) GeV^-4 +TOTAL : 0.702661 sec INFO: No Floating Point Exceptions have been reported - 2,818,181,262 cycles # 2.654 GHz - 6,988,460,270 instructions # 2.48 insn per cycle - 1.063195979 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) + 2,448,534,363 cycles:u # 3.468 GHz (75.07%) + 202,280 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.08%) + 246,454,849 stalled-cycles-backend:u # 10.07% backend cycles idle (75.08%) + 6,977,330,815 instructions:u # 2.85 insn per cycle + # 0.04 stalled cycles per insn (75.08%) + 0.708851823 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11604) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174574524E-004 -Relative difference = 2.7544470208782633e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271947045332125E-004 +Relative difference = 4.4583988847766445e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.763183e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.771185e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.771185e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.938662 sec -INFO: No Floating Point Exceptions have been reported - 2,488,393,509 cycles # 2.641 GHz - 6,295,244,635 instructions # 2.53 insn per cycle - 0.942828770 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174574524E-004 -Relative difference = 2.7544470208782633e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.363218e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.368048e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.368048e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.211358 sec -INFO: No Floating Point Exceptions have been reported - 2,044,658,355 cycles # 1.683 GHz - 3,265,998,063 instructions # 1.60 insn per cycle - 1.215542758 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2427) (512y: 46) (512z: 9578) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271952779373838E-004 -Relative difference = 4.193891735414155e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt index e281ad389f..031177f82a 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt @@ -1,250 +1,204 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_15:03:26 +DATE: 2024-05-16_16:28:47 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.615502e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.322427e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.322427e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 -TOTAL : 0.477115 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.553549e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.723413e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.723413e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.202335e-01 +- 3.251521e-01 ) GeV^-4 +TOTAL : 0.448264 sec INFO: No Floating Point Exceptions have been reported - 1,936,349,619 cycles # 2.809 GHz - 2,877,179,431 instructions # 1.49 insn per cycle - 0.747561501 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 1,251,529,402 cycles:u # 2.699 GHz (75.20%) + 2,893,169 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.15%) + 33,761,836 stalled-cycles-backend:u # 2.70% backend cycles idle (74.29%) + 1,611,617,832 instructions:u # 1.29 insn per cycle + # 0.02 stalled cycles per insn (75.70%) + 0.494487066 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.243623e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.556013e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.556013e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.641710e+00 +- 4.994249e+00 ) GeV^-4 -TOTAL : 1.907816 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.265840e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.707070e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.707070e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.213799e+02 +- 1.195366e+02 ) GeV^-4 +TOTAL : 3.437893 sec INFO: No Floating Point Exceptions have been reported - 6,131,638,198 cycles # 2.845 GHz - 12,981,768,605 instructions # 2.12 insn per cycle - 2.213144159 seconds time elapsed + 11,545,486,584 cycles:u # 3.334 GHz (74.94%) + 21,685,421 stalled-cycles-frontend:u # 0.19% frontend cycles idle (74.85%) + 1,139,000,542 stalled-cycles-backend:u # 9.87% backend cycles idle (74.84%) + 9,911,182,338 instructions:u # 0.86 insn per cycle + # 0.11 stalled cycles per insn (75.10%) + 3.489454557 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626454e-04 -Avg ME (F77/GPU) = 6.6262659968156085E-004 -Relative difference = 2.8371612387547027e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 6.626791e-04 +Avg ME (F77/GPU) = 6.6270899361878938E-004 +Relative difference = 4.511024836808726e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.909165e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.910120e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.910120e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.600727 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.456445e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.457487e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.457487e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208458e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 6.688589 sec INFO: No Floating Point Exceptions have been reported - 24,637,778,479 cycles # 2.864 GHz - 78,132,610,249 instructions # 3.17 insn per cycle - 8.604942209 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3603) (avx2: 0) (512y: 0) (512z: 0) + 23,329,106,189 cycles:u # 3.486 GHz (74.99%) + 964,423 stalled-cycles-frontend:u # 0.00% frontend cycles idle (75.02%) + 3,034,499,541 stalled-cycles-backend:u # 13.01% backend cycles idle (75.02%) + 75,768,705,801 instructions:u # 3.25 insn per cycle + # 0.04 stalled cycles per insn (75.02%) + 6.696220927 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3898) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274863266294753E-004 -Relative difference = 4.92840687132121e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627487e-04 +Avg ME (F77/C++) = 6.6274866115424713E-004 +Relative difference = 5.861309557415831e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.457452e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.468775e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.468775e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.550311 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 9.914382e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.931837e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.931837e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 1.666359 sec INFO: No Floating Point Exceptions have been reported - 6,935,275,139 cycles # 2.716 GHz - 20,130,100,658 instructions # 2.90 insn per cycle - 2.554710358 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) + 5,845,631,490 cycles:u # 3.501 GHz (74.83%) + 379,736 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.04%) + 808,475,687 stalled-cycles-backend:u # 13.83% backend cycles idle (75.09%) + 20,043,502,475 instructions:u # 3.43 insn per cycle + # 0.04 stalled cycles per insn (75.09%) + 1.672663397 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13237) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274861460025036E-004 -Relative difference = 2.2029847170826283e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627485e-04 +Avg ME (F77/C++) = 6.6274845946848876E-004 +Relative difference = 6.115670001294808e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.550561e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.557160e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.557160e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.068476 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.357311e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.367460e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.367460e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214980e-01 +- 3.255523e-01 ) GeV^-4 +TOTAL : 0.707778 sec INFO: No Floating Point Exceptions have been reported - 2,830,711,742 cycles # 2.640 GHz - 6,997,830,070 instructions # 2.47 insn per cycle - 1.072903816 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) + 2,463,882,722 cycles:u # 3.463 GHz (74.95%) + 628,681 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.26%) + 248,780,129 stalled-cycles-backend:u # 10.10% backend cycles idle (75.26%) + 6,979,514,050 instructions:u # 2.83 insn per cycle + # 0.04 stalled cycles per insn (75.26%) + 0.714446892 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11604) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174574524E-004 -Relative difference = 2.7544470208782633e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271947045332125E-004 +Relative difference = 4.4583988847766445e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.772339e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.780808e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.780808e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.936387 sec -INFO: No Floating Point Exceptions have been reported - 2,497,824,247 cycles # 2.658 GHz - 6,305,168,616 instructions # 2.52 insn per cycle - 0.940674173 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174574524E-004 -Relative difference = 2.7544470208782633e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.362852e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.367803e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.367803e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.214531 sec -INFO: No Floating Point Exceptions have been reported - 2,054,265,568 cycles # 1.686 GHz - 3,276,400,100 instructions # 1.59 insn per cycle - 1.218830996 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2427) (512y: 46) (512z: 9578) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271952779373838E-004 -Relative difference = 4.193891735414155e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt index 1c3846a692..b462198372 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt @@ -1,233 +1,193 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_15:13:15 +DATE: 2024-05-16_16:35:13 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --common OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.362325e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.415082e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.420724e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.159397e-01 +- 3.238804e-01 ) GeV^-4 -TOTAL : 0.471632 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.481260e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.715421e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.716098e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.202247e-01 +- 3.251485e-01 ) GeV^-4 +TOTAL : 0.438030 sec INFO: No Floating Point Exceptions have been reported - 1,968,514,245 cycles # 2.841 GHz - 2,902,581,432 instructions # 1.47 insn per cycle - 0.750206216 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 1,220,333,101 cycles:u # 2.666 GHz (75.54%) + 2,304,041 stalled-cycles-frontend:u # 0.19% frontend cycles idle (74.67%) + 45,270,810 stalled-cycles-backend:u # 3.71% backend cycles idle (74.67%) + 1,585,479,906 instructions:u # 1.30 insn per cycle + # 0.03 stalled cycles per insn (74.91%) + 0.479059290 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.620229e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.693332e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.696842e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.094367e+02 +- 1.071509e+02 ) GeV^-4 -TOTAL : 1.810763 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.684395e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.723876e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.724324e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.213664e+02 +- 1.195366e+02 ) GeV^-4 +TOTAL : 3.308698 sec INFO: No Floating Point Exceptions have been reported - 5,856,364,996 cycles # 2.870 GHz - 12,360,478,892 instructions # 2.11 insn per cycle - 2.100167053 seconds time elapsed + 11,006,758,594 cycles:u # 3.302 GHz (74.97%) + 10,951,906 stalled-cycles-frontend:u # 0.10% frontend cycles idle (75.13%) + 1,139,355,084 stalled-cycles-backend:u # 10.35% backend cycles idle (75.08%) + 8,915,439,098 instructions:u # 0.81 insn per cycle + # 0.13 stalled cycles per insn (75.01%) + 3.359798684 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626454e-04 -Avg ME (F77/GPU) = 6.6262659968156085E-004 -Relative difference = 2.8371612387547027e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 6.626791e-04 +Avg ME (F77/GPU) = 6.6270899361878938E-004 +Relative difference = 4.511024836808726e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.921942e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.922884e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.922884e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 8.542686 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.456233e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.457279e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.457279e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208458e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 6.687238 sec INFO: No Floating Point Exceptions have been reported - 24,622,493,732 cycles # 2.881 GHz - 78,127,963,456 instructions # 3.17 insn per cycle - 8.546707601 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3603) (avx2: 0) (512y: 0) (512z: 0) + 23,293,795,292 cycles:u # 3.482 GHz (75.01%) + 1,274,168 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.01%) + 3,029,191,292 stalled-cycles-backend:u # 13.00% backend cycles idle (75.01%) + 75,830,117,598 instructions:u # 3.26 insn per cycle + # 0.04 stalled cycles per insn (75.01%) + 6.696268134 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3898) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274863266294753E-004 -Relative difference = 4.92840687132121e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627487e-04 +Avg ME (F77/C++) = 6.6274866115424713E-004 +Relative difference = 5.861309557415831e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.925135e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.937766e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.937766e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 -TOTAL : 2.378023 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 9.892092e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.909443e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.909443e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 1.667991 sec INFO: No Floating Point Exceptions have been reported - 6,481,719,151 cycles # 2.722 GHz - 20,120,720,773 instructions # 3.10 insn per cycle - 2.382079719 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) + 5,842,552,310 cycles:u # 3.497 GHz (74.86%) + 397,694 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.09%) + 807,766,317 stalled-cycles-backend:u # 13.83% backend cycles idle (75.10%) + 20,042,846,114 instructions:u # 3.43 insn per cycle + # 0.04 stalled cycles per insn (75.10%) + 1.675653045 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13237) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274861460025036E-004 -Relative difference = 2.2029847170826283e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627485e-04 +Avg ME (F77/C++) = 6.6274845946848876E-004 +Relative difference = 6.115670001294808e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.581338e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.587855e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.587855e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 1.046370 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.275768e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.285251e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.285251e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214980e-01 +- 3.255523e-01 ) GeV^-4 +TOTAL : 0.730637 sec INFO: No Floating Point Exceptions have been reported - 2,822,358,408 cycles # 2.688 GHz - 6,985,542,199 instructions # 2.48 insn per cycle - 1.050425346 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) + 2,459,552,259 cycles:u # 3.354 GHz (74.93%) + 740,121 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.92%) + 248,043,808 stalled-cycles-backend:u # 10.08% backend cycles idle (74.91%) + 6,975,163,769 instructions:u # 2.84 insn per cycle + # 0.04 stalled cycles per insn (74.91%) + 0.734545689 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11604) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174574524E-004 -Relative difference = 2.7544470208782633e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271947045332125E-004 +Relative difference = 4.4583988847766445e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.806198e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.814674e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.814674e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.917862 sec -INFO: No Floating Point Exceptions have been reported - 2,496,110,223 cycles # 2.709 GHz - 6,293,657,033 instructions # 2.52 insn per cycle - 0.921934399 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174574524E-004 -Relative difference = 2.7544470208782633e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.393764e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.398765e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.398765e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 1.186576 sec -INFO: No Floating Point Exceptions have been reported - 2,050,577,153 cycles # 1.723 GHz - 3,264,219,053 instructions # 1.59 insn per cycle - 1.190613213 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2427) (512y: 46) (512z: 9578) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271952779373838E-004 -Relative difference = 4.193891735414155e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt index dc12ca7aae..582f69d8fa 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt @@ -1,236 +1,195 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_15:07:45 +DATE: 2024-05-16_16:33:03 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.747793e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.405382e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.411341e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 -TOTAL : 0.473580 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.570878e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.733414e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.734046e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.202335e-01 +- 3.251521e-01 ) GeV^-4 +TOTAL : 0.453497 sec INFO: No Floating Point Exceptions have been reported - 1,929,031,590 cycles # 2.811 GHz - 2,902,080,173 instructions # 1.50 insn per cycle - 0.744461149 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 1,268,447,146 cycles:u # 2.672 GHz (72.97%) + 2,815,308 stalled-cycles-frontend:u # 0.22% frontend cycles idle (74.71%) + 33,146,365 stalled-cycles-backend:u # 2.61% backend cycles idle (75.54%) + 1,688,684,066 instructions:u # 1.33 insn per cycle + # 0.02 stalled cycles per insn (74.71%) + 0.497506354 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.464876e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.690964e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.694375e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.641710e+00 +- 4.994249e+00 ) GeV^-4 -TOTAL : 1.841417 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.286069e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.718768e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.719203e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.213799e+02 +- 1.195366e+02 ) GeV^-4 +TOTAL : 3.413932 sec INFO: No Floating Point Exceptions have been reported - 5,892,322,421 cycles # 2.846 GHz - 12,206,550,799 instructions # 2.07 insn per cycle - 2.128532659 seconds time elapsed + 11,242,108,879 cycles:u # 3.266 GHz (75.00%) + 22,119,983 stalled-cycles-frontend:u # 0.20% frontend cycles idle (74.97%) + 1,144,437,843 stalled-cycles-backend:u # 10.18% backend cycles idle (74.91%) + 9,731,759,832 instructions:u # 0.87 insn per cycle + # 0.12 stalled cycles per insn (74.91%) + 3.461960927 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626454e-04 -Avg ME (F77/GPU) = 6.6262659968156085E-004 -Relative difference = 2.8371612387547027e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 6.626791e-04 +Avg ME (F77/GPU) = 6.6270899361878938E-004 +Relative difference = 4.511024836808726e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.911509e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.912427e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.912427e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.588000 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.387999e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.389049e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.389049e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208458e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 6.877878 sec INFO: No Floating Point Exceptions have been reported - 24,603,486,303 cycles # 2.864 GHz - 78,128,844,221 instructions # 3.18 insn per cycle - 8.592028071 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3603) (avx2: 0) (512y: 0) (512z: 0) + 23,305,784,912 cycles:u # 3.387 GHz (75.01%) + 923,326 stalled-cycles-frontend:u # 0.00% frontend cycles idle (75.00%) + 3,093,870,225 stalled-cycles-backend:u # 13.28% backend cycles idle (75.00%) + 75,741,235,779 instructions:u # 3.25 insn per cycle + # 0.04 stalled cycles per insn (75.00%) + 6.882197021 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3898) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274863266294753E-004 -Relative difference = 4.92840687132121e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627487e-04 +Avg ME (F77/C++) = 6.6274866115424713E-004 +Relative difference = 5.861309557415831e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.897521e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.909886e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.909886e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.385730 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 9.795421e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.812524e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.812524e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 1.684375 sec INFO: No Floating Point Exceptions have been reported - 6,477,077,766 cycles # 2.711 GHz - 20,121,628,941 instructions # 3.11 insn per cycle - 2.389950461 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) + 5,831,244,487 cycles:u # 3.457 GHz (74.88%) + 386,659 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.87%) + 816,144,338 stalled-cycles-backend:u # 14.00% backend cycles idle (74.87%) + 20,097,880,072 instructions:u # 3.45 insn per cycle + # 0.04 stalled cycles per insn (74.87%) + 1.689594333 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13237) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274861460025036E-004 -Relative difference = 2.2029847170826283e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627485e-04 +Avg ME (F77/C++) = 6.6274845946848876E-004 +Relative difference = 6.115670001294808e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.564279e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.570570e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.570570e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.056604 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.355939e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.365846e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.365846e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214980e-01 +- 3.255523e-01 ) GeV^-4 +TOTAL : 0.706033 sec INFO: No Floating Point Exceptions have been reported - 2,817,814,854 cycles # 2.658 GHz - 6,988,003,654 instructions # 2.48 insn per cycle - 1.060745031 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) + 2,450,973,730 cycles:u # 3.457 GHz (74.81%) + 233,106 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.17%) + 246,000,590 stalled-cycles-backend:u # 10.04% backend cycles idle (75.17%) + 6,983,784,502 instructions:u # 2.85 insn per cycle + # 0.04 stalled cycles per insn (75.17%) + 0.710121922 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11604) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174574524E-004 -Relative difference = 2.7544470208782633e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271947045332125E-004 +Relative difference = 4.4583988847766445e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.769248e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.777272e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.777272e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.935150 sec -INFO: No Floating Point Exceptions have been reported - 2,489,664,656 cycles # 2.652 GHz - 6,295,373,565 instructions # 2.53 insn per cycle - 0.939255376 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174574524E-004 -Relative difference = 2.7544470208782633e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.359069e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.363945e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.363945e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.214973 sec -INFO: No Floating Point Exceptions have been reported - 2,049,104,437 cycles # 1.682 GHz - 3,266,431,248 instructions # 1.59 insn per cycle - 1.219018056 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2427) (512y: 46) (512z: 9578) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271952779373838E-004 -Relative difference = 4.193891735414155e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt index fb9b3d5f50..825db12b95 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt @@ -1,233 +1,193 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_14:41:16 +DATE: 2024-05-16_16:00:29 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.355693e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.401036e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.406994e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.484635 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.526350e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.732794e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.733472e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.535870e-02 +- 4.279978e-02 ) GeV^-4 +TOTAL : 0.456858 sec INFO: No Floating Point Exceptions have been reported - 1,972,569,534 cycles # 2.816 GHz - 2,939,499,932 instructions # 1.49 insn per cycle - 0.757402101 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 1,172,997,975 cycles:u # 2.662 GHz (74.52%) + 2,102,170 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.94%) + 5,426,077 stalled-cycles-backend:u # 0.46% backend cycles idle (74.58%) + 1,608,142,364 instructions:u # 1.37 insn per cycle + # 0.00 stalled cycles per insn (74.86%) + 0.502460179 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.619113e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.679355e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.682149e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.713739 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.703353e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.735893e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.736329e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.763201e+01 +- 6.205778e+01 ) GeV^-4 +TOTAL : 2.625423 sec INFO: No Floating Point Exceptions have been reported - 5,540,767,327 cycles # 2.848 GHz - 11,699,037,597 instructions # 2.11 insn per cycle - 2.001424634 seconds time elapsed + 8,797,238,441 cycles:u # 3.342 GHz (74.92%) + 2,466,417 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.05%) + 5,893,940 stalled-cycles-backend:u # 0.07% backend cycles idle (75.08%) + 7,429,542,596 instructions:u # 0.84 insn per cycle + # 0.00 stalled cycles per insn (75.08%) + 2.674171484 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626454e-04 -Avg ME (F77/GPU) = 6.6262659968156085E-004 -Relative difference = 2.8371612387547027e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 6.626791e-04 +Avg ME (F77/GPU) = 6.6270899361878938E-004 +Relative difference = 4.511024836808726e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.914726e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.915647e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.915647e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.573445 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.468765e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.469812e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.469812e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208458e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 6.653197 sec INFO: No Floating Point Exceptions have been reported - 24,558,709,341 cycles # 2.863 GHz - 77,854,833,330 instructions # 3.17 insn per cycle - 8.577561930 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3114) (avx2: 0) (512y: 0) (512z: 0) + 23,282,870,099 cycles:u # 3.498 GHz (75.00%) + 939,267 stalled-cycles-frontend:u # 0.00% frontend cycles idle (75.00%) + 2,781,747,290 stalled-cycles-backend:u # 11.95% backend cycles idle (75.00%) + 75,716,409,389 instructions:u # 3.25 insn per cycle + # 0.04 stalled cycles per insn (75.00%) + 6.660070082 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3848) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627487e-04 -Avg ME (F77/C++) = 6.6274866268634797E-004 -Relative difference = 5.630135835748959e-08 +Avg ME (F77/C++) = 6.6274866108667618E-004 +Relative difference = 5.871505118544242e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.975982e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.989277e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.989277e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.359034 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 9.965818e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.983871e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.983871e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 1.655561 sec INFO: No Floating Point Exceptions have been reported - 6,427,044,365 cycles # 2.721 GHz - 20,086,102,386 instructions # 3.13 insn per cycle - 2.363343503 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13452) (avx2: 0) (512y: 0) (512z: 0) + 5,795,882,258 cycles:u # 3.493 GHz (74.96%) + 386,455 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.93%) + 950,491,218 stalled-cycles-backend:u # 16.40% backend cycles idle (74.93%) + 20,037,204,067 instructions:u # 3.46 insn per cycle + # 0.05 stalled cycles per insn (74.93%) + 1.662161951 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13231) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274861465384638E-004 -Relative difference = 2.211071647257023e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627485e-04 +Avg ME (F77/C++) = 6.6274845946848876E-004 +Relative difference = 6.115670001294808e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.504468e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.510257e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.510257e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.097904 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.377610e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.387963e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.387963e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214980e-01 +- 3.255523e-01 ) GeV^-4 +TOTAL : 0.699677 sec INFO: No Floating Point Exceptions have been reported - 2,922,790,348 cycles # 2.654 GHz - 7,129,934,034 instructions # 2.44 insn per cycle - 1.101954791 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12261) (512y: 0) (512z: 0) + 2,444,743,906 cycles:u # 3.477 GHz (75.06%) + 624,230 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.97%) + 293,905,462 stalled-cycles-backend:u # 12.02% backend cycles idle (74.97%) + 6,976,866,477 instructions:u # 2.85 insn per cycle + # 0.04 stalled cycles per insn (74.97%) + 0.705849957 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11587) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271939668077068E-004 -Relative difference = 5.008498817890231e-09 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271947045332125E-004 +Relative difference = 4.4583988847766445e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.699271e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.706623e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.706623e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.973114 sec -INFO: No Floating Point Exceptions have been reported - 2,595,556,237 cycles # 2.658 GHz - 6,438,662,691 instructions # 2.48 insn per cycle - 0.977341866 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11276) (512y: 27) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271939668077068E-004 -Relative difference = 5.008498817890231e-09 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.316379e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.321013e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.321013e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.254012 sec -INFO: No Floating Point Exceptions have been reported - 2,116,081,195 cycles # 1.683 GHz - 3,427,806,501 instructions # 1.62 insn per cycle - 1.258282002 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2924) (512y: 22) (512z: 9654) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271952032322112E-004 -Relative difference = 3.066639970473621e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt index 3f8f67a608..c1ed143442 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt @@ -1,233 +1,193 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_14:55:36 +DATE: 2024-05-16_16:18:01 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 64 256 1 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.546626e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.586411e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.590653e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.493388 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.920897e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.740666e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.741368e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.535870e-02 +- 4.279978e-02 ) GeV^-4 +TOTAL : 1.282174 sec INFO: No Floating Point Exceptions have been reported - 2,052,942,224 cycles # 2.851 GHz - 3,071,897,705 instructions # 1.50 insn per cycle - 0.778091403 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 1,167,038,613 cycles:u # 2.658 GHz (74.00%) + 2,071,328 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.53%) + 5,431,611 stalled-cycles-backend:u # 0.47% backend cycles idle (74.88%) + 1,540,476,429 instructions:u # 1.32 insn per cycle + # 0.00 stalled cycles per insn (76.40%) + 1.324941068 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.711232e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.769137e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.771784e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.862053 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.688286e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.717798e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.718232e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.763201e+01 +- 6.205778e+01 ) GeV^-4 +TOTAL : 2.613969 sec INFO: No Floating Point Exceptions have been reported - 6,039,204,312 cycles # 2.872 GHz - 11,937,016,347 instructions # 1.98 insn per cycle - 2.158356809 seconds time elapsed + 8,765,727,418 cycles:u # 3.327 GHz (74.62%) + 2,501,390 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.81%) + 5,540,277 stalled-cycles-backend:u # 0.06% backend cycles idle (75.12%) + 7,358,326,767 instructions:u # 0.84 insn per cycle + # 0.00 stalled cycles per insn (75.25%) + 2.663247898 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626454e-04 -Avg ME (F77/GPU) = 6.6262660579844562E-004 -Relative difference = 2.836238137986709e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 6.626791e-04 +Avg ME (F77/GPU) = 6.6270899361878938E-004 +Relative difference = 4.511024836808726e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.454006e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.454774e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.454774e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 -TOTAL : 30.077717 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 6.124957e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.125610e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.125610e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.204931e-01 +- 3.252405e-01 ) GeV^-4 +TOTAL : 26.786242 sec INFO: No Floating Point Exceptions have been reported - 86,228,096,895 cycles # 2.867 GHz - 135,581,749,205 instructions # 1.57 insn per cycle - 30.081848617 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:15593) (avx2: 0) (512y: 0) (512z: 0) + 93,343,664,723 cycles:u # 3.485 GHz (74.99%) + 553,820,580 stalled-cycles-frontend:u # 0.59% frontend cycles idle (75.01%) + 6,589,856,337 stalled-cycles-backend:u # 7.06% backend cycles idle (75.01%) + 133,679,871,960 instructions:u # 1.43 insn per cycle + # 0.05 stalled cycles per insn (75.01%) + 26.793077770 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:16437) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627535e-04 -Avg ME (F77/C++) = 6.6275351196781740E-004 -Relative difference = 1.805772034719401e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627534e-04 +Avg ME (F77/C++) = 6.6275342244977858E-004 +Relative difference = 3.387350194093721e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.767198e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.779100e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.779100e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 -TOTAL : 2.432356 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 8.092589e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.104600e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.104600e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.211992e-01 +- 3.254573e-01 ) GeV^-4 +TOTAL : 2.037363 sec INFO: No Floating Point Exceptions have been reported - 6,776,462,064 cycles # 2.783 GHz - 19,386,992,522 instructions # 2.86 insn per cycle - 2.436630257 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:69681) (avx2: 0) (512y: 0) (512z: 0) + 6,881,540,006 cycles:u # 3.372 GHz (74.96%) + 335,598 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.92%) + 3,042,174,457 stalled-cycles-backend:u # 44.21% backend cycles idle (74.92%) + 19,129,508,750 instructions:u # 2.78 insn per cycle + # 0.16 stalled cycles per insn (74.91%) + 2.044606153 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:68898) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274862707273868E-004 -Relative difference = 4.0849182767952624e-08 +Avg ME (F77/C++) = 6.6274857053714997E-004 +Relative difference = 4.445554471174176e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.415254e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.420302e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.420302e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 1.167177 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.452271e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.456048e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.456048e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.211846e-01 +- 3.254638e-01 ) GeV^-4 +TOTAL : 1.139550 sec INFO: No Floating Point Exceptions have been reported - 3,174,327,264 cycles # 2.711 GHz - 6,807,988,001 instructions # 2.14 insn per cycle - 1.171487938 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:49077) (512y: 0) (512z: 0) + 3,964,798,022 cycles:u # 3.468 GHz (74.83%) + 1,510,279 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.81%) + 2,247,951,197 stalled-cycles-backend:u # 56.70% backend cycles idle (74.81%) + 6,727,058,780 instructions:u # 1.70 insn per cycle + # 0.33 stalled cycles per insn (74.81%) + 1.147461385 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:48625) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627273e-04 -Avg ME (F77/C++) = 6.6272731558747466E-004 -Relative difference = 2.3520194007978538e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627274e-04 +Avg ME (F77/C++) = 6.6272735722101156E-004 +Relative difference = 6.454990161554483e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.702865e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.710950e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.710950e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 0.971352 sec -INFO: No Floating Point Exceptions have been reported - 2,641,359,018 cycles # 2.709 GHz - 5,985,956,533 instructions # 2.27 insn per cycle - 0.975633569 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:42677) (512y: 11) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627273e-04 -Avg ME (F77/C++) = 6.6272731558747466E-004 -Relative difference = 2.3520194007978538e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.382138e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.387143e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.387143e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060904e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.195051 sec -INFO: No Floating Point Exceptions have been reported - 2,079,765,601 cycles # 1.735 GHz - 3,501,460,071 instructions # 1.68 insn per cycle - 1.199295448 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5210) (512y: 3) (512z:44829) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627275e-04 -Avg ME (F77/C++) = 6.6272750363879224E-004 -Relative difference = 5.490631193034436e-09 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt index f651d28060..7c8363fc8c 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt @@ -1,233 +1,193 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_14:56:25 +DATE: 2024-05-16_16:18:45 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 64 256 1 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.511551e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.549792e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.554590e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.495451 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.335496e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.736793e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.737597e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.535870e-02 +- 4.279978e-02 ) GeV^-4 +TOTAL : 0.803562 sec INFO: No Floating Point Exceptions have been reported - 2,081,587,427 cycles # 2.833 GHz - 3,058,350,902 instructions # 1.47 insn per cycle - 0.791962629 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 1,181,097,322 cycles:u # 2.657 GHz (74.42%) + 2,196,405 stalled-cycles-frontend:u # 0.19% frontend cycles idle (74.84%) + 5,335,814 stalled-cycles-backend:u # 0.45% backend cycles idle (74.97%) + 1,533,683,700 instructions:u # 1.30 insn per cycle + # 0.00 stalled cycles per insn (76.38%) + 0.850514793 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.609535e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.664835e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.667390e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.882393 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.422474e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.737257e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.737702e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.763201e+01 +- 6.205778e+01 ) GeV^-4 +TOTAL : 3.381105 sec INFO: No Floating Point Exceptions have been reported - 6,108,387,546 cycles # 2.880 GHz - 12,340,826,531 instructions # 2.02 insn per cycle - 2.177538628 seconds time elapsed + 8,707,695,461 cycles:u # 3.299 GHz (74.76%) + 2,469,193 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.93%) + 5,175,074 stalled-cycles-backend:u # 0.06% backend cycles idle (75.12%) + 7,338,896,918 instructions:u # 0.84 insn per cycle + # 0.00 stalled cycles per insn (75.19%) + 3.430759133 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626454e-04 -Avg ME (F77/GPU) = 6.6262660579844562E-004 -Relative difference = 2.836238137986709e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 6.626791e-04 +Avg ME (F77/GPU) = 6.6270899361878938E-004 +Relative difference = 4.511024836808726e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.501753e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.502516e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.502516e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 -TOTAL : 29.816879 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 6.231441e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.232111e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.232111e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.204931e-01 +- 3.252404e-01 ) GeV^-4 +TOTAL : 26.329235 sec INFO: No Floating Point Exceptions have been reported - 86,332,810,324 cycles # 2.895 GHz - 136,005,056,328 instructions # 1.58 insn per cycle - 29.820915946 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:15571) (avx2: 0) (512y: 0) (512z: 0) + 90,935,247,567 cycles:u # 3.454 GHz (75.00%) + 214,862,709 stalled-cycles-frontend:u # 0.24% frontend cycles idle (75.00%) + 6,938,555,145 stalled-cycles-backend:u # 7.63% backend cycles idle (75.00%) + 134,149,203,954 instructions:u # 1.48 insn per cycle + # 0.05 stalled cycles per insn (75.00%) + 26.335826468 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:16752) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627535e-04 -Avg ME (F77/C++) = 6.6275348988418387E-004 -Relative difference = 1.5263316105958472e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627534e-04 +Avg ME (F77/C++) = 6.6275342811702997E-004 +Relative difference = 4.242457295829522e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.665500e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.677672e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.677672e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 -TOTAL : 2.468426 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 8.209079e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.221181e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.221181e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.211992e-01 +- 3.254573e-01 ) GeV^-4 +TOTAL : 2.007981 sec INFO: No Floating Point Exceptions have been reported - 6,845,942,231 cycles # 2.769 GHz - 19,438,050,467 instructions # 2.84 insn per cycle - 2.472841886 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:69723) (avx2: 0) (512y: 0) (512z: 0) + 6,831,424,486 cycles:u # 3.396 GHz (74.97%) + 340,501 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.95%) + 3,243,827,993 stalled-cycles-backend:u # 47.48% backend cycles idle (74.95%) + 19,147,748,926 instructions:u # 2.80 insn per cycle + # 0.17 stalled cycles per insn (74.95%) + 2.014470522 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:68882) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274862764021530E-004 -Relative difference = 4.170542995014107e-08 +Avg ME (F77/C++) = 6.6274857044990032E-004 +Relative difference = 4.4587192899226015e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.417376e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.422501e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.422501e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 1.165327 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.500161e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.504209e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.504209e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.211846e-01 +- 3.254638e-01 ) GeV^-4 +TOTAL : 1.103309 sec INFO: No Floating Point Exceptions have been reported - 3,124,352,057 cycles # 2.673 GHz - 6,718,803,660 instructions # 2.15 insn per cycle - 1.169556736 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:47667) (512y: 0) (512z: 0) + 3,841,380,021 cycles:u # 3.470 GHz (74.79%) + 743,882 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.71%) + 2,188,767,593 stalled-cycles-backend:u # 56.98% backend cycles idle (74.71%) + 6,678,743,261 instructions:u # 1.74 insn per cycle + # 0.33 stalled cycles per insn (74.91%) + 1.114464641 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:47416) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627273e-04 -Avg ME (F77/C++) = 6.6272731651051409E-004 -Relative difference = 2.4912983202981302e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627274e-04 +Avg ME (F77/C++) = 6.6272735755491807E-004 +Relative difference = 6.404606472340801e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.691458e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.698947e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.698947e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 0.977716 sec -INFO: No Floating Point Exceptions have been reported - 2,637,169,918 cycles # 2.688 GHz - 5,969,286,098 instructions # 2.26 insn per cycle - 0.981942660 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:41842) (512y: 13) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627273e-04 -Avg ME (F77/C++) = 6.6272731651051409E-004 -Relative difference = 2.4912983202981302e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.359514e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.364207e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.364207e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060904e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.214640 sec -INFO: No Floating Point Exceptions have been reported - 2,077,190,375 cycles # 1.705 GHz - 3,494,266,618 instructions # 1.68 insn per cycle - 1.219012886 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4174) (512y: 4) (512z:44472) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627275e-04 -Avg ME (F77/C++) = 6.6272750384530066E-004 -Relative difference = 5.80223501432476e-09 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 19b36f52e3..9d0d7beb2c 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,233 +1,193 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_14:41:41 +DATE: 2024-05-16_16:00:47 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.461685e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.485212e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.487571e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.529512 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.267158e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.440515e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.441290e+04 ) sec^-1 +MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 +TOTAL : 0.640624 sec INFO: No Floating Point Exceptions have been reported - 2,178,837,447 cycles # 2.822 GHz - 3,364,663,947 instructions # 1.54 insn per cycle - 0.830788537 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 1,970,993,277 cycles:u # 2.996 GHz (73.71%) + 2,234,518 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.07%) + 5,770,515 stalled-cycles-backend:u # 0.29% backend cycles idle (75.40%) + 2,169,407,546 instructions:u # 1.10 insn per cycle + # 0.00 stalled cycles per insn (75.69%) + 0.686561211 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.126264e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.153475e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.154649e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.029127 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.241178e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.246579e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.246636e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 +TOTAL : 7.701668 sec INFO: No Floating Point Exceptions have been reported - 9,370,132,482 cycles # 2.854 GHz - 19,961,685,193 instructions # 2.13 insn per cycle - 3.339329204 seconds time elapsed + 26,494,625,312 cycles:u # 3.436 GHz (75.00%) + 3,015,768 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.03%) + 5,017,372 stalled-cycles-backend:u # 0.02% backend cycles idle (74.96%) + 21,118,215,281 instructions:u # 0.80 insn per cycle + # 0.00 stalled cycles per insn (74.96%) + 7.755036739 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266732376103494E-004 Relative difference = 2.659538381540814e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.814801e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.815638e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.815638e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 9.046315 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.184089e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.184941e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.184941e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 7.520886 sec INFO: No Floating Point Exceptions have been reported - 25,898,843,827 cycles # 2.862 GHz - 79,438,691,532 instructions # 3.07 insn per cycle - 9.050548799 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4858) (avx2: 0) (512y: 0) (512z: 0) + 26,339,863,085 cycles:u # 3.501 GHz (75.02%) + 27,556,319 stalled-cycles-frontend:u # 0.10% frontend cycles idle (75.02%) + 3,725,587,826 stalled-cycles-backend:u # 14.14% backend cycles idle (75.02%) + 82,342,494,882 instructions:u # 3.13 insn per cycle + # 0.05 stalled cycles per insn (75.02%) + 7.528178258 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 6623) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731406016235E-004 Relative difference = 2.8059296349552523e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.427576e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.430583e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.430583e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.794109 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 5.104365e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.109013e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.109013e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 3.224895 sec INFO: No Floating Point Exceptions have been reported - 12,707,110,349 cycles # 2.649 GHz - 38,549,995,901 instructions # 3.03 insn per cycle - 4.798432518 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13163) (avx2: 0) (512y: 0) (512z: 0) + 11,294,876,919 cycles:u # 3.499 GHz (75.01%) + 3,584,548 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.97%) + 1,420,624,378 stalled-cycles-backend:u # 12.58% backend cycles idle (74.97%) + 38,448,878,910 instructions:u # 3.40 insn per cycle + # 0.04 stalled cycles per insn (74.97%) + 3.231777532 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:12755) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730246908442E-004 Relative difference = 2.98084507782618e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.947160e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.962973e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.962973e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.073357 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.221185e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.223860e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.223860e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.354741 sec INFO: No Floating Point Exceptions have been reported - 5,517,673,615 cycles # 2.658 GHz - 13,479,814,632 instructions # 2.44 insn per cycle - 2.077628129 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11242) (512y: 0) (512z: 0) + 4,740,411,287 cycles:u # 3.490 GHz (74.68%) + 293,447 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.83%) + 408,157,629 stalled-cycles-backend:u # 8.61% backend cycles idle (75.13%) + 13,512,450,760 instructions:u # 2.85 insn per cycle + # 0.03 stalled cycles per insn (75.27%) + 1.362546494 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10944) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266730409276857E-004 -Relative difference = 2.956342832710188e-07 +Avg ME (F77/C++) = 6.6266730409276836E-004 +Relative difference = 2.9563428359824236e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.092853e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.113453e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.113453e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.813039 sec -INFO: No Floating Point Exceptions have been reported - 4,828,852,439 cycles # 2.658 GHz - 12,135,084,334 instructions # 2.51 insn per cycle - 1.817332368 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10154) (512y: 79) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266730409276857E-004 -Relative difference = 2.956342832710188e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.671146e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.682190e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.682190e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.468744 sec -INFO: No Floating Point Exceptions have been reported - 4,141,507,976 cycles # 1.676 GHz - 6,337,241,929 instructions # 1.53 insn per cycle - 2.472886901 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1803) (512y: 93) (512z: 9358) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266730409276857E-004 -Relative difference = 2.956342832710188e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt index 10c707e81e..c299a88ec3 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt @@ -1,233 +1,193 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_14:42:15 +DATE: 2024-05-16_16:01:14 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.482619e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.506518e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.508744e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.530792 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.233929e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.442774e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.443041e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 +TOTAL : 0.549444 sec INFO: No Floating Point Exceptions have been reported - 2,175,803,522 cycles # 2.817 GHz - 3,378,965,043 instructions # 1.55 insn per cycle - 0.832396723 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 1,487,157,955 cycles:u # 2.746 GHz (75.03%) + 2,201,231 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.76%) + 5,308,721 stalled-cycles-backend:u # 0.36% backend cycles idle (74.67%) + 1,826,761,476 instructions:u # 1.23 insn per cycle + # 0.00 stalled cycles per insn (74.94%) + 0.610510696 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.148343e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.175835e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.177034e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.021680 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.732620e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.741273e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.741383e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 +TOTAL : 6.357089 sec INFO: No Floating Point Exceptions have been reported - 9,373,127,330 cycles # 2.855 GHz - 21,008,547,067 instructions # 2.24 insn per cycle - 3.339100414 seconds time elapsed + 21,757,949,392 cycles:u # 3.414 GHz (75.05%) + 2,871,375 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.11%) + 5,545,383 stalled-cycles-backend:u # 0.03% backend cycles idle (75.08%) + 17,476,889,330 instructions:u # 0.80 insn per cycle + # 0.00 stalled cycles per insn (75.02%) + 6.410991416 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266732376103494E-004 Relative difference = 2.659538381540814e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.816314e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.817146e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.817146e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 9.038908 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.214949e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.215824e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.215824e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 7.416114 sec INFO: No Floating Point Exceptions have been reported - 25,883,565,851 cycles # 2.863 GHz - 79,454,182,113 instructions # 3.07 insn per cycle - 9.043041112 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4505) (avx2: 0) (512y: 0) (512z: 0) + 25,997,090,548 cycles:u # 3.504 GHz (75.00%) + 2,505,020 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.99%) + 3,347,857,668 stalled-cycles-backend:u # 12.88% backend cycles idle (74.99%) + 82,247,523,819 instructions:u # 3.16 insn per cycle + # 0.04 stalled cycles per insn (74.99%) + 7.423065008 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 6491) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731406016235E-004 Relative difference = 2.8059296349552523e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.444359e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.447378e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.447378e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.770841 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 5.110095e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.114739e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.114739e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 3.221131 sec INFO: No Floating Point Exceptions have been reported - 12,673,930,469 cycles # 2.656 GHz - 38,521,208,960 instructions # 3.04 insn per cycle - 4.775036357 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:12930) (avx2: 0) (512y: 0) (512z: 0) + 11,269,898,163 cycles:u # 3.495 GHz (74.95%) + 4,697,638 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.95%) + 1,304,176,959 stalled-cycles-backend:u # 11.57% backend cycles idle (74.95%) + 38,519,332,782 instructions:u # 3.42 insn per cycle + # 0.03 stalled cycles per insn (74.95%) + 3.228176990 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:12729) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730246908442E-004 Relative difference = 2.98084507782618e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.869925e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.885399e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.885399e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.093382 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.227037e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.229728e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.229728e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.348175 sec INFO: No Floating Point Exceptions have been reported - 5,571,627,209 cycles # 2.657 GHz - 13,607,217,607 instructions # 2.44 insn per cycle - 2.097652206 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11327) (512y: 0) (512z: 0) + 4,737,521,447 cycles:u # 3.504 GHz (74.62%) + 291,147 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.87%) + 484,828,585 stalled-cycles-backend:u # 10.23% backend cycles idle (75.15%) + 13,528,004,531 instructions:u # 2.86 insn per cycle + # 0.04 stalled cycles per insn (75.15%) + 1.354978019 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10926) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266730409276857E-004 -Relative difference = 2.956342832710188e-07 +Avg ME (F77/C++) = 6.6266730409276836E-004 +Relative difference = 2.9563428359824236e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.920636e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.941759e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.941759e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.847928 sec -INFO: No Floating Point Exceptions have been reported - 4,911,801,030 cycles # 2.653 GHz - 12,271,296,407 instructions # 2.50 insn per cycle - 1.852091714 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10143) (512y: 239) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266730409276857E-004 -Relative difference = 2.956342832710188e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.658898e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.670376e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.670376e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.472418 sec -INFO: No Floating Point Exceptions have been reported - 4,148,038,447 cycles # 1.675 GHz - 6,442,551,576 instructions # 1.55 insn per cycle - 2.476725391 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1628) (512y: 191) (512z: 9356) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266730409276857E-004 -Relative difference = 2.956342832710188e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index 5b5bd116a3..3d0d3b15bb 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,233 +1,193 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-16_14:44:06 +DATE: 2024-05-16_16:02:08 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.065678e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.066059e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.066269e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.433623 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.861034e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.868579e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.868634e+01 ) sec^-1 +MeanMatrixElemValue = ( 3.297256e-04 +- 2.011325e-04 ) GeV^-6 +TOTAL : 9.607645 sec INFO: No Floating Point Exceptions have been reported - 7,848,341,490 cycles # 2.847 GHz - 17,462,165,188 instructions # 2.22 insn per cycle - 2.813787845 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 29,231,885,707 cycles:u # 3.037 GHz (74.94%) + 3,405,584 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.93%) + 10,029,785 stalled-cycles-backend:u # 0.03% backend cycles idle (74.99%) + 23,193,995,344 instructions:u # 0.79 insn per cycle + # 0.00 stalled cycles per insn (74.99%) + 9.660944397 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.279047e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.280919e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.281197e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.985795 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.508374e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.521642e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.521668e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.973353e-04 +- 5.853892e-04 ) GeV^-6 +TOTAL : 9.126743 sec INFO: No Floating Point Exceptions have been reported - 12,317,841,072 cycles # 2.855 GHz - 29,065,647,551 instructions # 2.36 insn per cycle - 4.369250222 seconds time elapsed + 27,595,818,037 cycles:u # 3.033 GHz (74.94%) + 3,263,343 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.01%) + 9,195,348 stalled-cycles-backend:u # 0.03% backend cycles idle (75.06%) + 21,903,563,579 instructions:u # 0.79 insn per cycle + # 0.00 stalled cycles per insn (75.05%) + 9.180327173 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 9.872263e-03 -Avg ME (F77/GPU) = 9.8722595284406640E-003 -Relative difference = 3.5164777671934515e-07 +Avg ME (F77/GPU) = 9.8722595284406710E-003 +Relative difference = 3.516477760164775e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.769533e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.769744e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.769744e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.808252 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.029752e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.029779e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.029779e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 5.133518 sec INFO: No Floating Point Exceptions have been reported - 18,798,528,369 cycles # 2.760 GHz - 53,916,630,138 instructions # 2.87 insn per cycle - 6.812355714 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32448) (avx2: 0) (512y: 0) (512z: 0) + 17,970,736,677 cycles:u # 3.498 GHz (74.93%) + 25,445,093 stalled-cycles-frontend:u # 0.14% frontend cycles idle (74.93%) + 2,114,620,206 stalled-cycles-backend:u # 11.77% backend cycles idle (74.96%) + 55,145,918,794 instructions:u # 3.07 insn per cycle + # 0.04 stalled cycles per insn (75.04%) + 5.143293014 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:44874) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.539980e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.540062e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.540062e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.433152 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.236434e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.236573e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.236573e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 2.366565 sec INFO: No Floating Point Exceptions have been reported - 9,799,231,624 cycles # 2.852 GHz - 27,092,581,938 instructions # 2.76 insn per cycle - 3.437235180 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:96443) (avx2: 0) (512y: 0) (512z: 0) + 8,276,572,707 cycles:u # 3.492 GHz (75.02%) + 1,944,212 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.02%) + 776,775,939 stalled-cycles-backend:u # 9.39% backend cycles idle (75.02%) + 26,993,949,937 instructions:u # 3.26 insn per cycle + # 0.03 stalled cycles per insn (75.02%) + 2.373131093 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:97234) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.326889e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.327284e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.327284e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.592072 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 5.196553e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.197233e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.197233e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 1.021705 sec INFO: No Floating Point Exceptions have been reported - 4,220,179,984 cycles # 2.645 GHz - 9,560,887,701 instructions # 2.27 insn per cycle - 1.596045022 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84390) (512y: 0) (512z: 0) + 3,566,060,493 cycles:u # 3.479 GHz (75.04%) + 1,630,557 stalled-cycles-frontend:u # 0.05% frontend cycles idle (75.03%) + 272,703,197 stalled-cycles-backend:u # 7.65% backend cycles idle (75.03%) + 9,519,525,811 instructions:u # 2.67 insn per cycle + # 0.03 stalled cycles per insn (75.03%) + 1.028233480 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84279) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.770010e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.770613e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.770613e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.405649 sec -INFO: No Floating Point Exceptions have been reported - 3,726,923,548 cycles # 2.645 GHz - 8,484,897,516 instructions # 2.28 insn per cycle - 1.409716339 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:79991) (512y: 91) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285411531E-003 -Relative difference = 3.516375977906115e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.281739e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.282258e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.282258e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.614138 sec -INFO: No Floating Point Exceptions have been reported - 2,690,163,143 cycles # 1.663 GHz - 4,272,866,756 instructions # 1.59 insn per cycle - 1.618172762 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2284) (512y: 105) (512z:79105) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285411531E-003 -Relative difference = 3.516375977906115e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt index 1be1cfeedf..98059781c6 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt @@ -1,250 +1,204 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-16_15:03:52 +DATE: 2024-05-16_16:29:06 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 --bridge OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe -p 1 256 2 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.063403e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.064386e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.064386e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.395260 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.810560e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.811281e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.811281e+01 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 9.602411 sec INFO: No Floating Point Exceptions have been reported - 7,755,019,436 cycles # 2.852 GHz - 17,230,726,903 instructions # 2.22 insn per cycle - 2.775067655 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 33,112,297,027 cycles:u # 3.443 GHz (75.03%) + 3,422,513 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.05%) + 10,504,866 stalled-cycles-backend:u # 0.03% backend cycles idle (75.04%) + 26,153,799,203 instructions:u # 0.79 insn per cycle + # 0.00 stalled cycles per insn (74.98%) + 9.655106468 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.263412e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.297943e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.297943e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.971807 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.527353e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.530982e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.530982e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.221264e+00 +- 1.219329e+00 ) GeV^-6 +TOTAL : 9.065470 sec INFO: No Floating Point Exceptions have been reported - 12,283,103,403 cycles # 2.855 GHz - 27,758,308,143 instructions # 2.26 insn per cycle - 4.357937638 seconds time elapsed + 31,120,573,577 cycles:u # 3.431 GHz (74.97%) + 4,565,890 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.96%) + 60,864,496 stalled-cycles-backend:u # 0.20% backend cycles idle (74.97%) + 24,601,274,090 instructions:u # 0.79 insn per cycle + # 0.00 stalled cycles per insn (74.98%) + 9.113362546 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 9.872263e-03 -Avg ME (F77/GPU) = 9.8722595284406640E-003 -Relative difference = 3.5164777671934515e-07 +Avg ME (F77/GPU) = 9.8722595284406710E-003 +Relative difference = 3.516477760164775e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.361041e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.361236e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.361236e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 7.177191 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.028478e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.028506e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.028506e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 5.140219 sec INFO: No Floating Point Exceptions have been reported - 18,941,402,140 cycles # 2.638 GHz - 53,918,413,850 instructions # 2.85 insn per cycle - 7.181202320 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32448) (avx2: 0) (512y: 0) (512z: 0) + 17,984,375,920 cycles:u # 3.497 GHz (74.96%) + 24,790,369 stalled-cycles-frontend:u # 0.14% frontend cycles idle (74.96%) + 2,154,971,354 stalled-cycles-backend:u # 11.98% backend cycles idle (74.96%) + 55,207,882,428 instructions:u # 3.07 insn per cycle + # 0.04 stalled cycles per insn (74.98%) + 5.149502955 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:44874) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.538220e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.538308e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.538308e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.437404 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.213003e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.213144e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.213144e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 2.393300 sec INFO: No Floating Point Exceptions have been reported - 9,826,263,323 cycles # 2.856 GHz - 27,093,421,705 instructions # 2.76 insn per cycle - 3.441586183 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:96443) (avx2: 0) (512y: 0) (512z: 0) + 8,244,856,086 cycles:u # 3.440 GHz (74.98%) + 1,395,959 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.97%) + 824,554,021 stalled-cycles-backend:u # 10.00% backend cycles idle (74.97%) + 26,990,902,568 instructions:u # 3.27 insn per cycle + # 0.03 stalled cycles per insn (74.97%) + 2.400361461 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:97234) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.320877e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.321321e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.321321e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.595267 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 5.165671e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.166363e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.166363e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 1.027888 sec INFO: No Floating Point Exceptions have been reported - 4,226,325,559 cycles # 2.644 GHz - 9,562,000,988 instructions # 2.26 insn per cycle - 1.599357751 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84390) (512y: 0) (512z: 0) + 3,583,881,539 cycles:u # 3.475 GHz (74.81%) + 1,492,642 stalled-cycles-frontend:u # 0.04% frontend cycles idle (75.17%) + 342,634,299 stalled-cycles-backend:u # 9.56% backend cycles idle (75.18%) + 9,515,596,355 instructions:u # 2.66 insn per cycle + # 0.04 stalled cycles per insn (75.18%) + 1.034777693 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84279) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.768698e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.769266e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.769266e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.406299 sec -INFO: No Floating Point Exceptions have been reported - 3,728,202,948 cycles # 2.645 GHz - 8,485,828,873 instructions # 2.28 insn per cycle - 1.410433353 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:79991) (512y: 91) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285411531E-003 -Relative difference = 3.516375977906115e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.279021e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.279511e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.279511e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.615850 sec -INFO: No Floating Point Exceptions have been reported - 2,693,497,833 cycles # 1.663 GHz - 4,273,840,765 instructions # 1.59 insn per cycle - 1.620067219 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2284) (512y: 105) (512z:79105) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285411531E-003 -Relative difference = 3.516375977906115e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt index 3f519fda03..03f1c7030a 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt @@ -1,233 +1,193 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-16_14:45:01 +DATE: 2024-05-16_16:03:26 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/check_hip.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.065431e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.065819e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.066015e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.431749 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.950174e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.955143e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.955187e+01 ) sec^-1 +MeanMatrixElemValue = ( 3.297256e-04 +- 2.011325e-04 ) GeV^-6 +TOTAL : 9.516879 sec INFO: No Floating Point Exceptions have been reported - 7,858,443,167 cycles # 2.852 GHz - 17,797,449,482 instructions # 2.26 insn per cycle - 2.810886675 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 32,591,472,838 cycles:u # 3.417 GHz (75.01%) + 3,390,992 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.04%) + 8,753,687 stalled-cycles-backend:u # 0.03% backend cycles idle (75.02%) + 25,746,238,931 instructions:u # 0.79 insn per cycle + # 0.00 stalled cycles per insn (75.02%) + 9.568277752 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.189050e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.190883e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.191132e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 4.007219 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.511919e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.526871e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.526891e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.973353e-04 +- 5.853892e-04 ) GeV^-6 +TOTAL : 9.059256 sec INFO: No Floating Point Exceptions have been reported - 12,388,147,716 cycles # 2.856 GHz - 29,572,084,158 instructions # 2.39 insn per cycle - 4.393669645 seconds time elapsed + 30,902,483,238 cycles:u # 3.416 GHz (74.99%) + 3,318,629 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.91%) + 6,492,525 stalled-cycles-backend:u # 0.02% backend cycles idle (74.92%) + 24,471,874,069 instructions:u # 0.79 insn per cycle + # 0.00 stalled cycles per insn (75.03%) + 9.106634823 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 9.872263e-03 -Avg ME (F77/GPU) = 9.8722595284406640E-003 -Relative difference = 3.5164777671934515e-07 +Avg ME (F77/GPU) = 9.8722595284406710E-003 +Relative difference = 3.516477760164775e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.555801e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.556015e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.556015e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.995892 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.022350e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.022377e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.022377e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 5.169976 sec INFO: No Floating Point Exceptions have been reported - 18,868,341,179 cycles # 2.696 GHz - 53,930,114,085 instructions # 2.86 insn per cycle - 6.999840535 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32063) (avx2: 0) (512y: 0) (512z: 0) + 18,013,433,306 cycles:u # 3.482 GHz (74.95%) + 25,779,908 stalled-cycles-frontend:u # 0.14% frontend cycles idle (74.95%) + 2,184,560,790 stalled-cycles-backend:u # 12.13% backend cycles idle (74.95%) + 55,161,919,505 instructions:u # 3.06 insn per cycle + # 0.04 stalled cycles per insn (75.00%) + 5.199827912 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:44747) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.547899e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.547983e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.547983e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.415669 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.224891e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.225027e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.225027e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 2.378492 sec INFO: No Floating Point Exceptions have been reported - 9,762,163,827 cycles # 2.856 GHz - 27,089,755,364 instructions # 2.77 insn per cycle - 3.419663266 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:96286) (avx2: 0) (512y: 0) (512z: 0) + 8,291,005,243 cycles:u # 3.481 GHz (74.81%) + 1,344,765 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.87%) + 814,344,181 stalled-cycles-backend:u # 9.82% backend cycles idle (75.04%) + 27,026,510,570 instructions:u # 3.26 insn per cycle + # 0.03 stalled cycles per insn (75.15%) + 2.385036711 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:97230) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.328520e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.328923e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.328923e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.591769 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 5.117019e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.117685e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.117685e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 1.036871 sec INFO: No Floating Point Exceptions have been reported - 4,217,350,816 cycles # 2.647 GHz - 9,560,856,496 instructions # 2.27 insn per cycle - 1.595749154 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84478) (512y: 0) (512z: 0) + 3,600,168,919 cycles:u # 3.461 GHz (74.69%) + 1,438,828 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.62%) + 295,250,423 stalled-cycles-backend:u # 8.20% backend cycles idle (74.65%) + 9,556,356,421 instructions:u # 2.65 insn per cycle + # 0.03 stalled cycles per insn (75.03%) + 1.045363830 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84249) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.765235e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.765802e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.765802e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.407302 sec -INFO: No Floating Point Exceptions have been reported - 3,737,969,275 cycles # 2.650 GHz - 8,484,674,655 instructions # 2.27 insn per cycle - 1.411406372 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80014) (512y: 241) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285411531E-003 -Relative difference = 3.516375977906115e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.273845e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.274322e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.274322e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.617639 sec -INFO: No Floating Point Exceptions have been reported - 2,695,774,477 cycles # 1.663 GHz - 4,276,120,388 instructions # 1.59 insn per cycle - 1.621698890 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2169) (512y: 187) (512z:79110) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285411531E-003 -Relative difference = 3.516375977906115e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index 8097294660..ef65546ab4 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,233 +1,193 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-16_14:45:55 +DATE: 2024-05-16_16:04:41 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.560287e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.561087e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.561509e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 -TOTAL : 1.688313 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.780984e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.784144e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.784162e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.256794e-06 +- 4.775721e-07 ) GeV^-6 +TOTAL : 4.541182 sec INFO: No Floating Point Exceptions have been reported - 5,610,919,333 cycles # 2.843 GHz - 12,076,970,192 instructions # 2.15 insn per cycle - 2.032164963 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 15,513,219,865 cycles:u # 3.400 GHz (74.94%) + 2,866,959 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.92%) + 6,407,246 stalled-cycles-backend:u # 0.04% backend cycles idle (75.01%) + 12,595,649,559 instructions:u # 0.81 insn per cycle + # 0.00 stalled cycles per insn (75.05%) + 4.592083956 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.335524e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.336187e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.336332e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.856829e-04 +- 8.333435e-05 ) GeV^-6 -TOTAL : 1.920912 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.212933e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.234752e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.234835e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.930014e-02 +- 1.363569e-02 ) GeV^-6 +TOTAL : 4.726997 sec INFO: No Floating Point Exceptions have been reported - 6,262,064,127 cycles # 2.846 GHz - 13,866,454,713 instructions # 2.21 insn per cycle - 2.256561773 seconds time elapsed + 16,074,409,092 cycles:u # 3.393 GHz (75.01%) + 2,704,019 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.02%) + 6,676,651 stalled-cycles-backend:u # 0.04% backend cycles idle (75.02%) + 13,068,602,962 instructions:u # 0.81 insn per cycle + # 0.00 stalled cycles per insn (75.06%) + 4.769822783 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 9.849636e-03 -Avg ME (F77/GPU) = 9.8712405367667715E-003 -Relative difference = 0.0021934350433631634 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 9.855155e-03 +Avg ME (F77/GPU) = 9.8696023209835834E-003 +Relative difference = 0.0014659658811639687 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.473644e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.473896e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.473896e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 6.236471 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.092658e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.092688e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.092688e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.924324e-03 +- 4.918778e-03 ) GeV^-6 +TOTAL : 4.836929 sec INFO: No Floating Point Exceptions have been reported - 17,834,532,335 cycles # 2.858 GHz - 53,589,179,622 instructions # 3.00 insn per cycle - 6.240522901 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:20208) (avx2: 0) (512y: 0) (512z: 0) + 16,829,785,934 cycles:u # 3.477 GHz (74.94%) + 17,294,263 stalled-cycles-frontend:u # 0.10% frontend cycles idle (75.02%) + 1,652,705,918 stalled-cycles-backend:u # 9.82% backend cycles idle (75.04%) + 51,748,648,616 instructions:u # 3.07 insn per cycle + # 0.03 stalled cycles per insn (75.04%) + 4.843459172 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:27812) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087541066E-003 -Relative difference = 2.1197698286506752e-08 +Avg ME (F77/C++) = 9.8479612087414119E-003 +Relative difference = 2.1196409216982896e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.311314e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.311704e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.311704e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.599839 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 4.600756e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.601329e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.601329e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.924322e-03 +- 4.918776e-03 ) GeV^-6 +TOTAL : 1.153223 sec INFO: No Floating Point Exceptions have been reported - 4,578,829,094 cycles # 2.856 GHz - 13,761,810,246 instructions # 3.01 insn per cycle - 1.603811766 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:96986) (avx2: 0) (512y: 0) (512z: 0) + 4,038,703,945 cycles:u # 3.491 GHz (74.79%) + 643,845 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.11%) + 407,380,888 stalled-cycles-backend:u # 10.09% backend cycles idle (75.11%) + 13,691,102,245 instructions:u # 3.39 insn per cycle + # 0.03 stalled cycles per insn (75.11%) + 1.169033845 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:97762) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.847955e-03 -Avg ME (F77/C++) = 9.8479546896527003E-003 -Relative difference = 3.151388282563952e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.847957e-03 +Avg ME (F77/C++) = 9.8479574833965355E-003 +Relative difference = 4.9085971470122835e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.636666e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.638274e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.638274e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.800488 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.018113e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.018365e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.018365e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 +TOTAL : 0.524433 sec INFO: No Floating Point Exceptions have been reported - 2,129,570,848 cycles # 2.649 GHz - 4,816,093,977 instructions # 2.26 insn per cycle - 0.804523713 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84904) (512y: 0) (512z: 0) + 1,791,692,598 cycles:u # 3.395 GHz (74.36%) + 910,702 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.23%) + 168,345,930 stalled-cycles-backend:u # 9.40% backend cycles idle (74.39%) + 4,819,680,819 instructions:u # 2.69 insn per cycle + # 0.03 stalled cycles per insn (75.16%) + 0.531951240 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84831) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 -Avg ME (F77/C++) = 9.8929728161070551E-003 -Relative difference = 1.858823877057982e-08 +Avg ME (F77/C++) = 9.8929728161012351E-003 +Relative difference = 1.8588827066662492e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.627354e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.629498e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.629498e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.697299 sec -INFO: No Floating Point Exceptions have been reported - 1,857,131,979 cycles # 2.651 GHz - 4,273,320,598 instructions # 2.30 insn per cycle - 0.701213399 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80610) (512y: 46) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.892973e-03 -Avg ME (F77/C++) = 9.8929728161070551E-003 -Relative difference = 1.858823877057982e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.540089e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.542023e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.542023e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.812936 sec -INFO: No Floating Point Exceptions have been reported - 1,360,618,833 cycles # 1.668 GHz - 2,159,125,772 instructions # 1.59 insn per cycle - 0.816997353 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2890) (512y: 49) (512z:79305) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.892981e-03 -Avg ME (F77/C++) = 9.8929811982958280E-003 -Relative difference = 2.0044092642523172e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt index 6d352d97ac..af9847b5bc 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt @@ -1,250 +1,204 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-16_15:04:47 +DATE: 2024-05-16_16:30:22 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 --bridge OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe -p 1 256 2 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.582684e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.584567e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.584567e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187094e-05 +- 9.825664e-06 ) GeV^-6 -TOTAL : 1.633735 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.745000e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.745330e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.745330e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.935145e-03 +- 4.929588e-03 ) GeV^-6 +TOTAL : 4.622996 sec INFO: No Floating Point Exceptions have been reported - 5,453,177,396 cycles # 2.847 GHz - 11,627,188,509 instructions # 2.13 insn per cycle - 1.971399165 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 15,867,233,439 cycles:u # 3.416 GHz (74.96%) + 2,681,610 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.03%) + 7,639,743 stalled-cycles-backend:u # 0.05% backend cycles idle (75.04%) + 12,859,486,940 instructions:u # 0.81 insn per cycle + # 0.00 stalled cycles per insn (75.01%) + 4.666755965 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.292389e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.306150e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.306150e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.856441e-04 +- 8.331096e-05 ) GeV^-6 -TOTAL : 1.932378 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.260170e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.276763e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.276763e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.258769e+00 +- 1.256832e+00 ) GeV^-6 +TOTAL : 4.745808 sec INFO: No Floating Point Exceptions have been reported - 6,313,737,946 cycles # 2.848 GHz - 13,568,150,990 instructions # 2.15 insn per cycle - 2.274068662 seconds time elapsed + 16,204,974,490 cycles:u # 3.403 GHz (75.02%) + 3,613,716 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.00%) + 47,404,694 stalled-cycles-backend:u # 0.29% backend cycles idle (74.97%) + 13,139,458,360 instructions:u # 0.81 insn per cycle + # 0.00 stalled cycles per insn (74.91%) + 4.790366579 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 9.849636e-03 -Avg ME (F77/GPU) = 9.8712405367667715E-003 -Relative difference = 0.0021934350433631634 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 9.855155e-03 +Avg ME (F77/GPU) = 9.8696023209835834E-003 +Relative difference = 0.0014659658811639687 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.447529e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.447785e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.447785e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 6.255519 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.096616e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.096647e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.096647e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.924324e-03 +- 4.918778e-03 ) GeV^-6 +TOTAL : 4.819941 sec INFO: No Floating Point Exceptions have been reported - 17,871,844,477 cycles # 2.856 GHz - 53,590,423,890 instructions # 3.00 insn per cycle - 6.259496797 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:20208) (avx2: 0) (512y: 0) (512z: 0) + 16,787,534,744 cycles:u # 3.481 GHz (74.97%) + 15,818,656 stalled-cycles-frontend:u # 0.09% frontend cycles idle (74.95%) + 1,742,758,069 stalled-cycles-backend:u # 10.38% backend cycles idle (74.95%) + 51,728,330,987 instructions:u # 3.08 insn per cycle + # 0.03 stalled cycles per insn (74.95%) + 4.826082829 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:27812) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087541066E-003 -Relative difference = 2.1197698286506752e-08 +Avg ME (F77/C++) = 9.8479612087414119E-003 +Relative difference = 2.1196409216982896e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.319002e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.319403e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.319403e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.595862 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 4.606640e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.607172e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.607172e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.924322e-03 +- 4.918776e-03 ) GeV^-6 +TOTAL : 1.152001 sec INFO: No Floating Point Exceptions have been reported - 4,573,738,949 cycles # 2.860 GHz - 13,762,785,828 instructions # 3.01 insn per cycle - 1.599904345 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:96986) (avx2: 0) (512y: 0) (512z: 0) + 4,025,017,670 cycles:u # 3.483 GHz (75.00%) + 659,426 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.08%) + 376,740,682 stalled-cycles-backend:u # 9.36% backend cycles idle (75.08%) + 13,709,516,953 instructions:u # 3.41 insn per cycle + # 0.03 stalled cycles per insn (75.08%) + 1.158459303 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:97762) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.847955e-03 -Avg ME (F77/C++) = 9.8479546896527003E-003 -Relative difference = 3.151388282563952e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.847957e-03 +Avg ME (F77/C++) = 9.8479574833965355E-003 +Relative difference = 4.9085971470122835e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.613525e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.615218e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.615218e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.804350 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 9.974105e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.976536e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.976536e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 +TOTAL : 0.534779 sec INFO: No Floating Point Exceptions have been reported - 2,139,167,872 cycles # 2.648 GHz - 4,817,111,626 instructions # 2.25 insn per cycle - 0.808508083 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84904) (512y: 0) (512z: 0) + 1,791,388,173 cycles:u # 3.329 GHz (74.77%) + 680,193 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.73%) + 160,894,614 stalled-cycles-backend:u # 8.98% backend cycles idle (74.73%) + 4,799,712,443 instructions:u # 2.68 insn per cycle + # 0.03 stalled cycles per insn (74.73%) + 0.541554035 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84831) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 -Avg ME (F77/C++) = 9.8929728161070551E-003 -Relative difference = 1.858823877057982e-08 +Avg ME (F77/C++) = 9.8929728161012351E-003 +Relative difference = 1.8588827066662492e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.603124e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.605216e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.605216e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.699524 sec -INFO: No Floating Point Exceptions have been reported - 1,862,402,974 cycles # 2.650 GHz - 4,274,167,467 instructions # 2.29 insn per cycle - 0.703628099 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80610) (512y: 46) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.892973e-03 -Avg ME (F77/C++) = 9.8929728161070551E-003 -Relative difference = 1.858823877057982e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.580110e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.582197e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.582197e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.807828 sec -INFO: No Floating Point Exceptions have been reported - 1,354,037,726 cycles # 1.669 GHz - 2,159,114,420 instructions # 1.59 insn per cycle - 0.811949308 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2890) (512y: 49) (512z:79305) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.892981e-03 -Avg ME (F77/C++) = 9.8929811982958280E-003 -Relative difference = 2.0044092642523172e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt index 2d3f5a3740..29dcec006d 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt @@ -1,233 +1,193 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-16_14:46:34 +DATE: 2024-05-16_16:05:29 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/check_hip.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.539024e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.539847e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.540280e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 -TOTAL : 1.689103 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.794480e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.797565e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.797585e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.256794e-06 +- 4.775721e-07 ) GeV^-6 +TOTAL : 4.575830 sec INFO: No Floating Point Exceptions have been reported - 5,627,474,622 cycles # 2.847 GHz - 11,923,534,222 instructions # 2.12 insn per cycle - 2.035228412 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 15,575,228,231 cycles:u # 3.399 GHz (75.04%) + 2,805,607 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.03%) + 5,139,054 stalled-cycles-backend:u # 0.03% backend cycles idle (74.91%) + 12,697,850,039 instructions:u # 0.82 insn per cycle + # 0.00 stalled cycles per insn (74.93%) + 4.620142781 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.303686e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.304329e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.304469e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.856829e-04 +- 8.333435e-05 ) GeV^-6 -TOTAL : 1.932451 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.257559e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.282938e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.283009e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.930014e-02 +- 1.363569e-02 ) GeV^-6 +TOTAL : 4.706697 sec INFO: No Floating Point Exceptions have been reported - 6,311,455,519 cycles # 2.848 GHz - 13,762,708,375 instructions # 2.18 insn per cycle - 2.272906437 seconds time elapsed + 15,931,475,462 cycles:u # 3.386 GHz (75.01%) + 2,790,044 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.03%) + 6,488,872 stalled-cycles-backend:u # 0.04% backend cycles idle (74.98%) + 12,976,776,317 instructions:u # 0.81 insn per cycle + # 0.00 stalled cycles per insn (74.92%) + 4.753084936 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 9.849636e-03 -Avg ME (F77/GPU) = 9.8712405367667715E-003 -Relative difference = 0.0021934350433631634 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 9.855155e-03 +Avg ME (F77/GPU) = 9.8696023209835834E-003 +Relative difference = 0.0014659658811639687 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.477402e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.477656e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.477656e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 6.233047 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.068990e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.069020e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.069020e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.924324e-03 +- 4.918778e-03 ) GeV^-6 +TOTAL : 4.949661 sec INFO: No Floating Point Exceptions have been reported - 17,803,580,317 cycles # 2.855 GHz - 53,580,069,164 instructions # 3.01 insn per cycle - 6.237030677 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:20207) (avx2: 0) (512y: 0) (512z: 0) + 16,872,539,803 cycles:u # 3.406 GHz (74.97%) + 19,348,489 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.97%) + 1,719,375,862 stalled-cycles-backend:u # 10.19% backend cycles idle (74.97%) + 51,753,051,978 instructions:u # 3.07 insn per cycle + # 0.03 stalled cycles per insn (74.97%) + 4.957569698 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:27678) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087582491E-003 -Relative difference = 2.1198118933954545e-08 +Avg ME (F77/C++) = 9.8479612087396841E-003 +Relative difference = 2.119623377106246e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.307415e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.307805e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.307805e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.601039 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 4.251516e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.251999e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.251999e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.924322e-03 +- 4.918776e-03 ) GeV^-6 +TOTAL : 1.246488 sec INFO: No Floating Point Exceptions have been reported - 4,572,009,891 cycles # 2.850 GHz - 13,755,353,111 instructions # 3.01 insn per cycle - 1.605120576 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:96606) (avx2: 0) (512y: 0) (512z: 0) + 4,011,784,105 cycles:u # 3.208 GHz (75.07%) + 772,500 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.05%) + 398,973,911 stalled-cycles-backend:u # 9.95% backend cycles idle (75.05%) + 13,716,235,420 instructions:u # 3.42 insn per cycle + # 0.03 stalled cycles per insn (75.05%) + 1.263384384 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:97728) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.847955e-03 -Avg ME (F77/C++) = 9.8479546896225560E-003 -Relative difference = 3.151694379513441e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.847957e-03 +Avg ME (F77/C++) = 9.8479574833965355E-003 +Relative difference = 4.9085971470122835e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.585961e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.587683e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.587683e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.806516 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.021642e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.021902e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.021902e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 +TOTAL : 0.522051 sec INFO: No Floating Point Exceptions have been reported - 2,141,220,761 cycles # 2.644 GHz - 4,818,439,860 instructions # 2.25 insn per cycle - 0.810543510 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:85359) (512y: 0) (512z: 0) + 1,798,107,812 cycles:u # 3.421 GHz (74.15%) + 427,486 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.44%) + 146,647,209 stalled-cycles-backend:u # 8.16% backend cycles idle (75.20%) + 4,777,320,458 instructions:u # 2.66 insn per cycle + # 0.03 stalled cycles per insn (75.65%) + 0.528489057 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84793) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 -Avg ME (F77/C++) = 9.8929728161070967E-003 -Relative difference = 1.8588234562202478e-08 +Avg ME (F77/C++) = 9.8929728161012351E-003 +Relative difference = 1.8588827066662492e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.583228e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.585349e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.585349e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.701072 sec -INFO: No Floating Point Exceptions have been reported - 1,870,651,613 cycles # 2.656 GHz - 4,275,203,774 instructions # 2.29 insn per cycle - 0.705038579 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81075) (512y: 26) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.892973e-03 -Avg ME (F77/C++) = 9.8929728161070967E-003 -Relative difference = 1.8588234562202478e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.570140e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.572065e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.572065e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.808794 sec -INFO: No Floating Point Exceptions have been reported - 1,356,929,556 cycles # 1.671 GHz - 2,164,613,956 instructions # 1.60 insn per cycle - 0.812781092 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3487) (512y: 34) (512z:79499) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.892981e-03 -Avg ME (F77/C++) = 9.8929811982955140E-003 -Relative difference = 2.0044060904369713e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index dfab5870bc..7994fb35b0 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,233 +1,193 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-16_14:47:13 +DATE: 2024-05-16_16:06:18 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/check_hip.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.689455e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.689959e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.690212e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 2.179203 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.408577e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.414468e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.414507e+01 ) sec^-1 +MeanMatrixElemValue = ( 3.297256e-04 +- 2.011325e-04 ) GeV^-6 +TOTAL : 9.956495 sec INFO: No Floating Point Exceptions have been reported - 7,126,539,551 cycles # 2.849 GHz - 15,807,759,758 instructions # 2.22 insn per cycle - 2.559686036 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 33,567,482,139 cycles:u # 3.371 GHz (74.95%) + 3,460,523 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.98%) + 6,543,761 stalled-cycles-backend:u # 0.02% backend cycles idle (75.03%) + 26,527,364,853 instructions:u # 0.79 insn per cycle + # 0.00 stalled cycles per insn (75.07%) + 10.022352268 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.107889e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.108154e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.108192e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.424856 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.292206e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.295533e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.295553e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.973353e-04 +- 5.853892e-04 ) GeV^-6 +TOTAL : 9.372690 sec INFO: No Floating Point Exceptions have been reported - 10,729,635,772 cycles # 2.852 GHz - 25,204,058,412 instructions # 2.35 insn per cycle - 3.820430433 seconds time elapsed + 31,571,554,884 cycles:u # 3.365 GHz (74.99%) + 3,356,337 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.01%) + 6,949,101 stalled-cycles-backend:u # 0.02% backend cycles idle (75.02%) + 24,987,178,999 instructions:u # 0.79 insn per cycle + # 0.00 stalled cycles per insn (75.03%) + 9.419617214 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 9.872263e-03 -Avg ME (F77/GPU) = 9.8722599015656498E-003 -Relative difference = 3.1385249252060663e-07 +Avg ME (F77/GPU) = 9.8722599015656533E-003 +Relative difference = 3.138524921691728e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.303415e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.303607e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.303607e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 7.238072 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 9.913091e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.913361e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.913361e+01 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 5.328063 sec INFO: No Floating Point Exceptions have been reported - 19,150,406,884 cycles # 2.645 GHz - 54,154,394,762 instructions # 2.83 insn per cycle - 7.242308052 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32067) (avx2: 0) (512y: 0) (512z: 0) + 18,090,484,403 cycles:u # 3.393 GHz (74.95%) + 31,419,411 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.94%) + 2,281,933,190 stalled-cycles-backend:u # 12.61% backend cycles idle (74.94%) + 55,395,326,342 instructions:u # 3.06 insn per cycle + # 0.04 stalled cycles per insn (74.99%) + 5.335013340 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:44898) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595861831675E-003 Relative difference = 3.457988134687711e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.497154e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.497235e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.497235e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 3.531757 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.319058e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.319196e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.319196e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 2.281605 sec INFO: No Floating Point Exceptions have been reported - 9,343,938,644 cycles # 2.643 GHz - 26,158,830,842 instructions # 2.80 insn per cycle - 3.535758073 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:96007) (avx2: 0) (512y: 0) (512z: 0) + 7,899,102,195 cycles:u # 3.457 GHz (74.80%) + 583,779 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.87%) + 790,863,753 stalled-cycles-backend:u # 10.01% backend cycles idle (75.04%) + 25,844,357,323 instructions:u # 3.27 insn per cycle + # 0.03 stalled cycles per insn (75.15%) + 2.288299376 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:96804) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594844308162E-003 Relative difference = 3.5610570575237004e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.453828e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.454275e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.454275e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.534198 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 5.387508e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.388294e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.388294e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 0.986063 sec INFO: No Floating Point Exceptions have been reported - 4,069,691,610 cycles # 2.648 GHz - 9,228,168,046 instructions # 2.27 insn per cycle - 1.538179495 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84155) (512y: 0) (512z: 0) + 3,393,191,576 cycles:u # 3.428 GHz (75.01%) + 1,102,520 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.95%) + 272,550,314 stalled-cycles-backend:u # 8.03% backend cycles idle (74.95%) + 9,067,112,205 instructions:u # 2.67 insn per cycle + # 0.03 stalled cycles per insn (74.95%) + 0.992852344 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83820) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.986335e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.986931e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.986931e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.329366 sec -INFO: No Floating Point Exceptions have been reported - 3,528,184,184 cycles # 2.647 GHz - 8,174,614,993 instructions # 2.32 insn per cycle - 1.333451918 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:79844) (512y: 79) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722594324461913E-003 -Relative difference = 3.613714310412983e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.367967e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.368468e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.368468e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.573046 sec -INFO: No Floating Point Exceptions have been reported - 2,618,946,865 cycles # 1.661 GHz - 4,154,480,374 instructions # 1.59 insn per cycle - 1.577054610 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2046) (512y: 93) (512z:78760) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722594324461913E-003 -Relative difference = 3.613714310412983e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt index 3ddfb4805b..8c8684e3d6 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt @@ -1,233 +1,193 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-16_14:48:06 +DATE: 2024-05-16_16:07:36 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/check_hip.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.679279e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.679786e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.680054e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 2.178048 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.512212e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.517450e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.517493e+01 ) sec^-1 +MeanMatrixElemValue = ( 3.297256e-04 +- 2.011325e-04 ) GeV^-6 +TOTAL : 9.863561 sec INFO: No Floating Point Exceptions have been reported - 7,134,880,740 cycles # 2.851 GHz - 15,434,594,866 instructions # 2.16 insn per cycle - 2.558453633 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 33,411,525,809 cycles:u # 3.381 GHz (75.00%) + 3,395,895 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.99%) + 6,487,254 stalled-cycles-backend:u # 0.02% backend cycles idle (75.05%) + 26,457,448,109 instructions:u # 0.79 insn per cycle + # 0.00 stalled cycles per insn (75.05%) + 9.914327052 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.104221e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.104483e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.104525e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.411600 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.303149e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.306076e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.306096e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.973353e-04 +- 5.853892e-04 ) GeV^-6 +TOTAL : 9.350448 sec INFO: No Floating Point Exceptions have been reported - 10,672,973,002 cycles # 2.855 GHz - 24,521,846,399 instructions # 2.30 insn per cycle - 3.794724712 seconds time elapsed + 30,640,968,158 cycles:u # 3.271 GHz (74.99%) + 3,457,860 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.98%) + 7,072,147 stalled-cycles-backend:u # 0.02% backend cycles idle (74.98%) + 24,241,249,676 instructions:u # 0.79 insn per cycle + # 0.00 stalled cycles per insn (74.98%) + 9.396296167 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 9.872263e-03 -Avg ME (F77/GPU) = 9.8722599015656498E-003 -Relative difference = 3.1385249252060663e-07 +Avg ME (F77/GPU) = 9.8722599015656533E-003 +Relative difference = 3.138524921691728e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.893217e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.893439e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.893439e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.694276 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 9.991963e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.992233e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.992233e+01 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 5.286309 sec INFO: No Floating Point Exceptions have been reported - 19,121,414,788 cycles # 2.855 GHz - 54,156,458,090 instructions # 2.83 insn per cycle - 6.698138270 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32244) (avx2: 0) (512y: 0) (512z: 0) + 18,200,100,367 cycles:u # 3.441 GHz (74.92%) + 31,693,487 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.99%) + 2,173,708,240 stalled-cycles-backend:u # 11.94% backend cycles idle (75.05%) + 55,300,519,357 instructions:u # 3.04 insn per cycle + # 0.04 stalled cycles per insn (75.05%) + 5.293712357 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:44806) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595861831675E-003 Relative difference = 3.457988134687711e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.495395e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.495480e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.495480e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 3.535528 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.042520e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.042641e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.042641e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 2.590307 sec INFO: No Floating Point Exceptions have been reported - 9,391,010,006 cycles # 2.654 GHz - 26,079,707,862 instructions # 2.78 insn per cycle - 3.539600596 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:95901) (avx2: 0) (512y: 0) (512z: 0) + 7,993,267,480 cycles:u # 3.081 GHz (75.02%) + 2,354,729 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.02%) + 788,725,170 stalled-cycles-backend:u # 9.87% backend cycles idle (75.02%) + 25,756,603,572 instructions:u # 3.22 insn per cycle + # 0.03 stalled cycles per insn (75.02%) + 2.597135056 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:96765) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594844308162E-003 Relative difference = 3.5610570575237004e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.518532e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.518969e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.518969e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.505616 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 4.797254e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.797903e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.797903e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 1.106761 sec INFO: No Floating Point Exceptions have been reported - 4,001,150,405 cycles # 2.652 GHz - 9,212,868,850 instructions # 2.30 insn per cycle - 1.509560632 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83776) (512y: 0) (512z: 0) + 3,391,654,110 cycles:u # 3.054 GHz (74.79%) + 718,366 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.79%) + 300,486,852 stalled-cycles-backend:u # 8.86% backend cycles idle (74.79%) + 9,091,654,928 instructions:u # 2.68 insn per cycle + # 0.03 stalled cycles per insn (75.10%) + 1.113615121 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83378) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.985927e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.986486e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.986486e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.329709 sec -INFO: No Floating Point Exceptions have been reported - 3,529,740,112 cycles # 2.648 GHz - 8,168,252,869 instructions # 2.31 insn per cycle - 1.333651402 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:79373) (512y: 229) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722594324461913E-003 -Relative difference = 3.613714310412983e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.359545e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.360045e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.360045e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.576721 sec -INFO: No Floating Point Exceptions have been reported - 2,623,702,370 cycles # 1.660 GHz - 4,153,356,804 instructions # 1.58 insn per cycle - 1.580839869 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1493) (512y: 175) (512z:78776) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722594324461913E-003 -Relative difference = 3.613714310412983e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 38bc670a18..188bf1165b 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -1,233 +1,70 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-16_14:42:48 +DATE: 2024-05-16_16:01:39 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.927387e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.315718e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.634653e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.452451 sec -INFO: No Floating Point Exceptions have been reported - 1,879,085,625 cycles # 2.815 GHz - 2,632,406,951 instructions # 1.40 insn per cycle - 0.724903288 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.675522e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.208336e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.557322e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.534593 sec -INFO: No Floating Point Exceptions have been reported - 2,165,913,457 cycles # 2.812 GHz - 3,139,398,529 instructions # 1.45 insn per cycle - 0.827804422 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.424749e-01 -Avg ME (F77/GPU) = 0.14247482467490466 -Relative difference = 5.286902838873106e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.011560e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.033153e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.033153e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.642980 sec -INFO: No Floating Point Exceptions have been reported - 4,710,402,412 cycles # 2.861 GHz - 13,462,495,012 instructions # 2.86 insn per cycle - 1.647108070 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 860) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467499484 -Relative difference = 5.286896509487005e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.839775e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.910542e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.910542e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.913158 sec -INFO: No Floating Point Exceptions have been reported - 2,620,816,977 cycles # 2.859 GHz - 7,551,970,333 instructions # 2.88 insn per cycle - 0.917276709 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3099) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467499478 -Relative difference = 5.28689651338321e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.970408e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.156692e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.156692e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.574933 sec -INFO: No Floating Point Exceptions have been reported - 1,480,758,822 cycles # 2.560 GHz - 3,119,703,419 instructions # 2.11 insn per cycle - 0.579132992 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2917) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492595 -Relative difference = 5.286901344678233e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.428889e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.682851e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.682851e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.501020 sec -INFO: No Floating Point Exceptions have been reported - 1,347,520,276 cycles # 2.670 GHz - 2,981,434,055 instructions # 2.21 insn per cycle - 0.505363497 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2694) (512y: 104) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492595 -Relative difference = 5.286901344678233e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe: Segmentation fault + 713,963,187 cycles:u # 2.170 GHz (73.28%) + 2,042,187 stalled-cycles-frontend:u # 0.29% frontend cycles idle (76.83%) + 5,187,442 stalled-cycles-backend:u # 0.73% backend cycles idle (75.80%) + 1,231,009,265 instructions:u # 1.72 insn per cycle + # 0.00 stalled cycles per insn (76.48%) + 0.407439823 seconds time elapsed +......................................................................... +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.241546e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.347861e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.347861e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.756225 sec -INFO: No Floating Point Exceptions have been reported - 1,330,320,612 cycles # 1.751 GHz - 1,953,406,018 instructions # 1.47 insn per cycle - 0.760489864 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1372) (512y: 106) (512z: 2173) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe: Segmentation fault + 896,221,881 cycles:u # 2.079 GHz (75.90%) + 2,055,660 stalled-cycles-frontend:u # 0.23% frontend cycles idle (75.90%) + 5,691,868 stalled-cycles-backend:u # 0.64% backend cycles idle (75.92%) + 1,375,853,093 instructions:u # 1.54 insn per cycle + # 0.00 stalled cycles per insn (75.91%) + 0.493613360 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492595 -Relative difference = 5.286901344678233e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/runTest_hip.exe +Memory access fault by GPU node-4 (Agent handle: 0x6026a0) on address 0x14f44de7a000. Reason: Unknown. diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt index 6f141963c0..e662317ff0 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt @@ -1,250 +1,78 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-16_15:02:27 +DATE: 2024-05-16_16:28:10 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.428295e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.103056e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.103056e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.480636 sec -INFO: No Floating Point Exceptions have been reported - 1,959,891,585 cycles # 2.818 GHz - 2,927,619,706 instructions # 1.49 insn per cycle - 0.752080667 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe: Segmentation fault + 769,673,769 cycles:u # 2.223 GHz (74.48%) + 2,560,023 stalled-cycles-frontend:u # 0.33% frontend cycles idle (74.72%) + 28,792,356 stalled-cycles-backend:u # 3.74% backend cycles idle (75.65%) + 1,230,708,006 instructions:u # 1.60 insn per cycle + # 0.02 stalled cycles per insn (77.63%) + 0.415541760 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.157968e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.371122e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.371122e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.764001 sec -INFO: No Floating Point Exceptions have been reported - 2,873,640,599 cycles # 2.829 GHz - 4,407,079,803 instructions # 1.53 insn per cycle - 1.073816079 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.424749e-01 -Avg ME (F77/GPU) = 0.14247482467490466 -Relative difference = 5.286902838873106e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.008642e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.030371e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.030371e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.654853 sec -INFO: No Floating Point Exceptions have been reported - 4,747,034,662 cycles # 2.862 GHz - 13,469,694,473 instructions # 2.84 insn per cycle - 1.659302078 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 860) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467499484 -Relative difference = 5.286896509487005e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.820348e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.892211e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.892211e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.930951 sec -INFO: No Floating Point Exceptions have been reported - 2,665,977,292 cycles # 2.852 GHz - 7,601,998,240 instructions # 2.85 insn per cycle - 0.935555380 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3099) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe: Segmentation fault + 2,975,700,076 cycles:u # 2.756 GHz (75.87%) + 16,209,947 stalled-cycles-frontend:u # 0.54% frontend cycles idle (75.56%) + 848,638,694 stalled-cycles-backend:u # 28.52% backend cycles idle (75.54%) + 3,194,109,069 instructions:u # 1.07 insn per cycle + # 0.27 stalled cycles per insn (75.52%) + 1.140659477 seconds time elapsed ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467499478 -Relative difference = 5.28689651338321e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.068620e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.272960e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.272960e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.564348 sec -INFO: No Floating Point Exceptions have been reported - 1,513,664,570 cycles # 2.669 GHz - 3,168,463,518 instructions # 2.09 insn per cycle - 0.568761168 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2917) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492595 -Relative difference = 5.286901344678233e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.408389e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.655047e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.655047e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.511104 sec -INFO: No Floating Point Exceptions have been reported - 1,377,582,779 cycles # 2.675 GHz - 3,030,644,125 instructions # 2.20 insn per cycle - 0.515560343 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2694) (512y: 104) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492595 -Relative difference = 5.286901344678233e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.221799e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.329402e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.329402e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.769820 sec -INFO: No Floating Point Exceptions have been reported - 1,366,102,927 cycles # 1.765 GHz - 1,991,071,116 instructions # 1.46 insn per cycle - 0.774386560 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1372) (512y: 106) (512z: 2173) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492595 -Relative difference = 5.286901344678233e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/runTest_hip.exe +Memory access fault by GPU node-4 (Agent handle: 0x6026a0) on address 0x14c1f811a000. Reason: Unknown. diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt index 25b8d3c885..d10e43658c 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt @@ -1,233 +1,70 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-16_14:43:01 +DATE: 2024-05-16_16:01:48 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.907170e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.197971e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.504611e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.455771 sec -INFO: No Floating Point Exceptions have been reported - 1,881,865,516 cycles # 2.813 GHz - 2,669,782,801 instructions # 1.42 insn per cycle - 0.727786761 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.641992e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.081273e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.416654e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.532301 sec -INFO: No Floating Point Exceptions have been reported - 2,167,822,822 cycles # 2.823 GHz - 3,120,353,321 instructions # 1.44 insn per cycle - 0.825343283 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.424749e-01 -Avg ME (F77/GPU) = 0.14247482467490466 -Relative difference = 5.286902838873106e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.007784e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.029112e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.029112e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.648995 sec -INFO: No Floating Point Exceptions have been reported - 4,725,323,359 cycles # 2.860 GHz - 13,457,369,308 instructions # 2.85 insn per cycle - 1.653142214 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 849) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467499484 -Relative difference = 5.286896509487005e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.833913e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.904030e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.904030e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.915889 sec -INFO: No Floating Point Exceptions have been reported - 2,628,184,982 cycles # 2.858 GHz - 7,551,273,836 instructions # 2.87 insn per cycle - 0.920086997 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3092) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467499478 -Relative difference = 5.28689651338321e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.116183e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.320457e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.320457e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.548674 sec -INFO: No Floating Point Exceptions have been reported - 1,476,841,675 cycles # 2.675 GHz - 3,117,924,257 instructions # 2.11 insn per cycle - 0.552738607 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2900) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492595 -Relative difference = 5.286901344678233e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.456247e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.706124e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.706124e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.496887 sec -INFO: No Floating Point Exceptions have been reported - 1,340,057,166 cycles # 2.677 GHz - 2,978,732,248 instructions # 2.22 insn per cycle - 0.501058940 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2670) (512y: 104) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492595 -Relative difference = 5.286901344678233e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe: Segmentation fault + 722,050,915 cycles:u # 2.167 GHz (72.82%) + 2,092,789 stalled-cycles-frontend:u # 0.29% frontend cycles idle (77.11%) + 4,947,015 stalled-cycles-backend:u # 0.69% backend cycles idle (76.05%) + 1,226,010,153 instructions:u # 1.70 insn per cycle + # 0.00 stalled cycles per insn (76.56%) + 0.389591226 seconds time elapsed +......................................................................... +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.241283e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.347840e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.347840e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.757118 sec -INFO: No Floating Point Exceptions have been reported - 1,329,966,748 cycles # 1.749 GHz - 1,951,787,640 instructions # 1.47 insn per cycle - 0.761356492 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1348) (512y: 106) (512z: 2173) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe: Segmentation fault + 923,029,168 cycles:u # 2.154 GHz (74.77%) + 2,266,174 stalled-cycles-frontend:u # 0.25% frontend cycles idle (72.08%) + 4,971,550 stalled-cycles-backend:u # 0.54% backend cycles idle (74.17%) + 1,357,666,646 instructions:u # 1.47 insn per cycle + # 0.00 stalled cycles per insn (76.61%) + 0.481792171 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492595 -Relative difference = 5.286901344678233e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/runTest_hip.exe +Memory access fault by GPU node-4 (Agent handle: 0x6015f0) on address 0x14c2a6b19000. Reason: Unknown. diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 88eaa7d80d..6f72dff6a1 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -1,233 +1,70 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-16_14:43:15 +DATE: 2024-05-16_16:01:51 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.867335e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.223690e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.343650e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 -TOTAL : 0.450195 sec -INFO: No Floating Point Exceptions have been reported - 1,886,543,936 cycles # 2.814 GHz - 2,627,629,254 instructions # 1.39 insn per cycle - 0.729554150 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 167 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.183442e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.842494e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.962990e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.571361e+02 +- 2.114021e+02 ) GeV^-2 -TOTAL : 0.482910 sec -INFO: No Floating Point Exceptions have been reported - 1,994,696,147 cycles # 2.812 GHz - 2,828,466,882 instructions # 1.42 insn per cycle - 0.766894337 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.424226e-01 -Avg ME (F77/GPU) = 0.14247488790821983 -Relative difference = 0.00036713209996037764 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.069532e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.093791e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.093791e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.553490 sec -INFO: No Floating Point Exceptions have been reported - 4,455,366,971 cycles # 2.862 GHz - 13,047,769,817 instructions # 2.93 insn per cycle - 1.557630020 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 745) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424686e-01 -Avg ME (F77/C++) = 0.14246857540270419 -Relative difference = 1.7265064590569047e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.869084e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.052765e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.052765e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 -TOTAL : 0.592151 sec -INFO: No Floating Point Exceptions have been reported - 1,701,146,602 cycles # 2.856 GHz - 4,512,165,265 instructions # 2.65 insn per cycle - 0.596248693 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3600) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424686e-01 -Avg ME (F77/C++) = 0.14246859631675157 -Relative difference = 2.5853054135974944e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.609679e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.315056e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.315056e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.313413 sec -INFO: No Floating Point Exceptions have been reported - 850,737,642 cycles # 2.684 GHz - 1,895,945,890 instructions # 2.23 insn per cycle - 0.317546154 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3491) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247489318272599 -Relative difference = 4.784894739577799e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.973396e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.785303e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.785303e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.295859 sec -INFO: No Floating Point Exceptions have been reported - 801,819,935 cycles # 2.679 GHz - 1,819,229,849 instructions # 2.27 insn per cycle - 0.299944027 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3335) (512y: 22) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247489318272599 -Relative difference = 4.784894739577799e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe: Segmentation fault + 739,138,184 cycles:u # 2.272 GHz (70.20%) + 2,203,118 stalled-cycles-frontend:u # 0.30% frontend cycles idle (73.01%) + 4,809,093 stalled-cycles-backend:u # 0.65% backend cycles idle (75.72%) + 1,197,836,797 instructions:u # 1.62 insn per cycle + # 0.00 stalled cycles per insn (75.47%) + 0.379451473 seconds time elapsed +......................................................................... +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.354956e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.770974e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.770974e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.400076 sec -INFO: No Floating Point Exceptions have been reported - 733,009,701 cycles # 1.817 GHz - 1,304,250,799 instructions # 1.78 insn per cycle - 0.404216975 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1973) (512y: 32) (512z: 2382) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe: Segmentation fault + 806,619,492 cycles:u # 2.156 GHz (75.32%) + 2,155,748 stalled-cycles-frontend:u # 0.27% frontend cycles idle (74.43%) + 5,351,844 stalled-cycles-backend:u # 0.66% backend cycles idle (75.37%) + 1,293,687,319 instructions:u # 1.60 insn per cycle + # 0.00 stalled cycles per insn (74.61%) + 0.410509600 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247489383243206 -Relative difference = 4.32888033512879e-08 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/runTest_hip.exe +Memory access fault by GPU node-4 (Agent handle: 0x5ee6a0) on address 0x148dbe8a5000. Reason: Unknown. diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt index b62a8a0309..b1ad2e06ab 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt @@ -1,250 +1,78 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-16_15:02:40 +DATE: 2024-05-16_16:28:15 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.337579e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.030007e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.030007e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.017654e+01 +- 1.429184e+01 ) GeV^-2 -TOTAL : 0.462122 sec -INFO: No Floating Point Exceptions have been reported - 1,901,719,201 cycles # 2.816 GHz - 2,811,032,752 instructions # 1.48 insn per cycle - 0.731978994 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 167 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe: Segmentation fault + 744,907,152 cycles:u # 2.134 GHz (76.47%) + 2,619,285 stalled-cycles-frontend:u # 0.35% frontend cycles idle (75.14%) + 21,668,084 stalled-cycles-backend:u # 2.91% backend cycles idle (74.88%) + 1,202,893,963 instructions:u # 1.61 insn per cycle + # 0.02 stalled cycles per insn (75.25%) + 0.428107332 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.907303e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.566216e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.566216e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.609942e+02 +- 2.115590e+02 ) GeV^-2 -TOTAL : 0.637098 sec -INFO: No Floating Point Exceptions have been reported - 2,459,040,544 cycles # 2.824 GHz - 3,715,271,980 instructions # 1.51 insn per cycle - 0.927773682 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.424226e-01 -Avg ME (F77/GPU) = 0.14247488790821983 -Relative difference = 0.00036713209996037764 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.068152e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.092782e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.092782e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.559403 sec -INFO: No Floating Point Exceptions have been reported - 4,475,912,555 cycles # 2.864 GHz - 13,052,235,712 instructions # 2.92 insn per cycle - 1.563691095 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 745) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424686e-01 -Avg ME (F77/C++) = 0.14246857540270419 -Relative difference = 1.7265064590569047e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.856394e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.039884e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.039884e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 -TOTAL : 0.599599 sec -INFO: No Floating Point Exceptions have been reported - 1,723,185,860 cycles # 2.856 GHz - 4,560,285,596 instructions # 2.65 insn per cycle - 0.603925442 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3600) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe: Segmentation fault + 2,924,404,172 cycles:u # 2.888 GHz (73.83%) + 17,608,008 stalled-cycles-frontend:u # 0.60% frontend cycles idle (74.79%) + 854,434,104 stalled-cycles-backend:u # 29.22% backend cycles idle (75.54%) + 3,082,375,053 instructions:u # 1.05 insn per cycle + # 0.28 stalled cycles per insn (75.24%) + 1.041915815 seconds time elapsed ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424686e-01 -Avg ME (F77/C++) = 0.14246859631675157 -Relative difference = 2.5853054135974944e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.545801e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.241062e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.241062e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.321141 sec -INFO: No Floating Point Exceptions have been reported - 871,513,310 cycles # 2.683 GHz - 1,932,959,243 instructions # 2.22 insn per cycle - 0.325378385 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3491) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247489318272599 -Relative difference = 4.784894739577799e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.891127e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.696072e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.696072e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.304221 sec -INFO: No Floating Point Exceptions have been reported - 825,995,486 cycles # 2.683 GHz - 1,856,161,781 instructions # 2.25 insn per cycle - 0.308416114 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3335) (512y: 22) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247489318272599 -Relative difference = 4.784894739577799e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.307702e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.720545e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.720545e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.408443 sec -INFO: No Floating Point Exceptions have been reported - 755,445,387 cycles # 1.833 GHz - 1,345,989,570 instructions # 1.78 insn per cycle - 0.412779323 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1973) (512y: 32) (512z: 2382) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247489383243206 -Relative difference = 4.32888033512879e-08 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/runTest_hip.exe +Memory access fault by GPU node-4 (Agent handle: 0x5ee6a0) on address 0x1531a427d000. Reason: Unknown. diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt index f782cd39a5..3fa0b52721 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt @@ -1,233 +1,70 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-16_14:43:27 +DATE: 2024-05-16_16:01:55 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.882997e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.225822e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.344729e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 -TOTAL : 0.447687 sec -INFO: No Floating Point Exceptions have been reported - 1,891,564,072 cycles # 2.816 GHz - 2,660,739,786 instructions # 1.41 insn per cycle - 0.729746219 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 167 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.107850e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.805980e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.921999e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.571361e+02 +- 2.114021e+02 ) GeV^-2 -TOTAL : 0.485790 sec -INFO: No Floating Point Exceptions have been reported - 1,996,906,378 cycles # 2.807 GHz - 2,867,667,096 instructions # 1.44 insn per cycle - 0.769333150 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.424226e-01 -Avg ME (F77/GPU) = 0.14247488790821983 -Relative difference = 0.00036713209996037764 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.069812e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.094168e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.094168e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.553096 sec -INFO: No Floating Point Exceptions have been reported - 4,454,505,799 cycles # 2.862 GHz - 13,029,391,838 instructions # 2.92 insn per cycle - 1.557292510 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 727) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424686e-01 -Avg ME (F77/C++) = 0.14246857540270419 -Relative difference = 1.7265064590569047e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.876347e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.060596e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.060596e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 -TOTAL : 0.590255 sec -INFO: No Floating Point Exceptions have been reported - 1,693,495,983 cycles # 2.852 GHz - 4,508,141,451 instructions # 2.66 insn per cycle - 0.594398488 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3588) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424686e-01 -Avg ME (F77/C++) = 0.14246859631675157 -Relative difference = 2.5853054135974944e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.574680e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.273652e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.273652e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.315080 sec -INFO: No Floating Point Exceptions have been reported - 851,359,645 cycles # 2.672 GHz - 1,893,112,803 instructions # 2.22 insn per cycle - 0.319204462 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3461) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247489318272599 -Relative difference = 4.784894739577799e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.978403e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.785893e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.785893e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.295093 sec -INFO: No Floating Point Exceptions have been reported - 799,712,323 cycles # 2.678 GHz - 1,814,979,638 instructions # 2.27 insn per cycle - 0.299228201 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3298) (512y: 22) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247489318272599 -Relative difference = 4.784894739577799e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe: Segmentation fault + 722,785,633 cycles:u # 1.960 GHz (75.07%) + 2,293,673 stalled-cycles-frontend:u # 0.32% frontend cycles idle (70.85%) + 6,301,391 stalled-cycles-backend:u # 0.87% backend cycles idle (73.24%) + 1,209,125,825 instructions:u # 1.67 insn per cycle + # 0.01 stalled cycles per insn (77.15%) + 0.415465911 seconds time elapsed +......................................................................... +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.317992e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.737735e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.737735e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.402763 sec -INFO: No Floating Point Exceptions have been reported - 736,511,578 cycles # 1.812 GHz - 1,302,115,541 instructions # 1.77 insn per cycle - 0.406867415 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1936) (512y: 32) (512z: 2382) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe: Segmentation fault + 789,672,018 cycles:u # 1.841 GHz (75.32%) + 2,108,026 stalled-cycles-frontend:u # 0.27% frontend cycles idle (75.87%) + 5,194,088 stalled-cycles-backend:u # 0.66% backend cycles idle (76.73%) + 1,297,221,444 instructions:u # 1.64 insn per cycle + # 0.00 stalled cycles per insn (75.85%) + 0.514680463 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247489383243206 -Relative difference = 4.32888033512879e-08 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/runTest_hip.exe +Memory access fault by GPU node-4 (Agent handle: 0x5ed5f0) on address 0x14ca6eee4000. Reason: Unknown. diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index 265a4a7626..cb57e418df 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -1,233 +1,70 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-16_14:43:39 +DATE: 2024-05-16_16:01:59 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.940149e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.336219e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.662963e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.452840 sec -INFO: No Floating Point Exceptions have been reported - 1,880,363,198 cycles # 2.808 GHz - 2,677,692,820 instructions # 1.42 insn per cycle - 0.726161506 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.684159e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.236315e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.588311e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.530728 sec -INFO: No Floating Point Exceptions have been reported - 2,164,642,485 cycles # 2.821 GHz - 3,145,530,012 instructions # 1.45 insn per cycle - 0.824333778 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.424749e-01 -Avg ME (F77/GPU) = 0.14247482577104625 -Relative difference = 5.209967070245855e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.003476e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.024445e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.024445e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.655769 sec -INFO: No Floating Point Exceptions have been reported - 4,745,491,139 cycles # 2.860 GHz - 13,466,039,366 instructions # 2.84 insn per cycle - 1.659848552 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 840) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482734618697 -Relative difference = 5.099411406595165e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.849332e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.920343e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.920343e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.908133 sec -INFO: No Floating Point Exceptions have been reported - 2,605,721,632 cycles # 2.858 GHz - 7,384,650,569 instructions # 2.83 insn per cycle - 0.912227813 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3073) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482734618697 -Relative difference = 5.099411406595165e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.133010e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.340359e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.340359e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.546033 sec -INFO: No Floating Point Exceptions have been reported - 1,469,888,298 cycles # 2.674 GHz - 3,055,461,884 instructions # 2.08 insn per cycle - 0.550169150 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3013) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482643254802 -Relative difference = 5.163537715318965e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.544324e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.807645e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.807645e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.485213 sec -INFO: No Floating Point Exceptions have been reported - 1,307,959,720 cycles # 2.676 GHz - 2,930,377,532 instructions # 2.24 insn per cycle - 0.489382978 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2799) (512y: 110) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482643254802 -Relative difference = 5.163537715318965e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe: Segmentation fault + 708,194,008 cycles:u # 1.920 GHz (75.96%) + 2,073,803 stalled-cycles-frontend:u # 0.29% frontend cycles idle (76.55%) + 6,021,706 stalled-cycles-backend:u # 0.85% backend cycles idle (74.51%) + 1,268,817,780 instructions:u # 1.79 insn per cycle + # 0.00 stalled cycles per insn (72.13%) + 0.408351550 seconds time elapsed +......................................................................... +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.172350e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.272043e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.272043e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.779592 sec -INFO: No Floating Point Exceptions have been reported - 1,368,592,699 cycles # 1.747 GHz - 1,969,378,714 instructions # 1.44 insn per cycle - 0.783958712 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1700) (512y: 114) (512z: 2171) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe: Segmentation fault + 905,656,386 cycles:u # 1.899 GHz (74.39%) + 2,346,083 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.27%) + 5,205,158 stalled-cycles-backend:u # 0.57% backend cycles idle (73.59%) + 1,403,918,632 instructions:u # 1.55 insn per cycle + # 0.00 stalled cycles per insn (73.04%) + 0.502193913 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482643254802 -Relative difference = 5.163537715318965e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/runTest_hip.exe +Memory access fault by GPU node-4 (Agent handle: 0x6026a0) on address 0x14b91ff9a000. Reason: Unknown. diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt index 84e80111cc..437a700155 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt @@ -1,233 +1,70 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-16_14:43:53 +DATE: 2024-05-16_16:02:03 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.890956e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.181054e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.513059e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.453973 sec -INFO: No Floating Point Exceptions have been reported - 1,876,167,670 cycles # 2.808 GHz - 2,662,885,558 instructions # 1.42 insn per cycle - 0.726739496 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.642147e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.081360e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.416296e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.533908 sec -INFO: No Floating Point Exceptions have been reported - 2,163,893,097 cycles # 2.818 GHz - 3,132,561,280 instructions # 1.45 insn per cycle - 0.826852700 seconds time elapsed -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/runTest_cuda.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.424749e-01 -Avg ME (F77/GPU) = 0.14247482577104625 -Relative difference = 5.209967070245855e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.007176e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.028375e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.028375e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.650928 sec -INFO: No Floating Point Exceptions have been reported - 4,733,031,285 cycles # 2.861 GHz - 13,451,191,160 instructions # 2.84 insn per cycle - 1.655053441 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 827) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482734618697 -Relative difference = 5.099411406595165e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.847760e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.919370e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.919370e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.909156 sec -INFO: No Floating Point Exceptions have been reported - 2,606,818,939 cycles # 2.857 GHz - 7,388,977,556 instructions # 2.83 insn per cycle - 0.913243210 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3062) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482734618697 -Relative difference = 5.099411406595165e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.915489e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.093943e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.093943e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.585235 sec -INFO: No Floating Point Exceptions have been reported - 1,469,957,671 cycles # 2.496 GHz - 3,055,084,256 instructions # 2.08 insn per cycle - 0.589443028 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2990) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482643254802 -Relative difference = 5.163537715318965e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.535422e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.797003e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.797003e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.486063 sec -INFO: No Floating Point Exceptions have been reported - 1,306,700,125 cycles # 2.669 GHz - 2,930,583,524 instructions # 2.24 insn per cycle - 0.490171496 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2775) (512y: 110) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482643254802 -Relative difference = 5.163537715318965e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe: Segmentation fault + 705,570,664 cycles:u # 1.900 GHz (76.20%) + 2,241,490 stalled-cycles-frontend:u # 0.32% frontend cycles idle (72.84%) + 6,208,242 stalled-cycles-backend:u # 0.88% backend cycles idle (71.13%) + 1,273,792,982 instructions:u # 1.81 insn per cycle + # 0.00 stalled cycles per insn (74.02%) + 0.428782940 seconds time elapsed +......................................................................... +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.173668e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.273111e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.273111e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.778991 sec -INFO: No Floating Point Exceptions have been reported - 1,367,910,665 cycles # 1.749 GHz - 1,969,371,455 instructions # 1.44 insn per cycle - 0.783143035 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1676) (512y: 114) (512z: 2171) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe: Segmentation fault + 901,184,138 cycles:u # 1.897 GHz (74.40%) + 2,252,378 stalled-cycles-frontend:u # 0.25% frontend cycles idle (73.94%) + 5,221,412 stalled-cycles-backend:u # 0.58% backend cycles idle (73.24%) + 1,363,461,109 instructions:u # 1.51 insn per cycle + # 0.00 stalled cycles per insn (75.20%) + 0.512631423 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482643254802 -Relative difference = 5.163537715318965e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/runTest_hip.exe +Memory access fault by GPU node-4 (Agent handle: 0x6015f0) on address 0x153235909000. Reason: Unknown. diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt index 8af6873425..82a9bd67cc 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-05-16_15:20:33 +DATE: 2024-05-16_16:48:00 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.588343e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.081541e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.176224e+08 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 0.528808 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.585455e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.456806e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.498470e+07 ) sec^-1 +MeanMatrixElemValue = ( 7.088120e+00 +- 1.629041e-01 ) GeV^0 +TOTAL : 0.397875 sec INFO: No Floating Point Exceptions have been reported - 2,192,111,166 cycles # 2.821 GHz - 3,135,008,318 instructions # 1.43 insn per cycle - 0.833908791 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 943,758,789 cycles:u # 2.286 GHz (73.85%) + 2,179,631 stalled-cycles-frontend:u # 0.23% frontend cycles idle (75.36%) + 5,460,392 stalled-cycles-backend:u # 0.58% backend cycles idle (74.84%) + 1,513,257,999 instructions:u # 1.60 insn per cycle + # 0.00 stalled cycles per insn (74.84%) + 0.452944684 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 4.313472e+00 -Avg ME (F77/GPU) = 4.3134710926110280 -Relative difference = 2.1036162329561614e-07 +Avg ME (F77/GPU) = 4.3134710926110271 +Relative difference = 2.1036162350152416e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.865233e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.915227e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.915227e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 5.734356 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.306069e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.360222e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.360222e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 4.737139 sec INFO: No Floating Point Exceptions have been reported - 16,430,057,220 cycles # 2.863 GHz - 42,484,854,801 instructions # 2.59 insn per cycle - 5.739849036 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 711) (avx2: 0) (512y: 0) (512z: 0) + 16,137,200,364 cycles:u # 3.397 GHz (74.92%) + 9,389,080 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.91%) + 3,553,701,667 stalled-cycles-backend:u # 22.02% backend cycles idle (74.98%) + 41,552,302,338 instructions:u # 2.57 insn per cycle + # 0.09 stalled cycles per insn (75.06%) + 4.754746528 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105795 Relative difference = 2.1036172727915933e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.235376e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.401567e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.401567e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.353450 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 4.027570e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.213591e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.213591e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 2.790838 sec INFO: No Floating Point Exceptions have been reported - 9,612,345,009 cycles # 2.863 GHz - 26,317,248,003 instructions # 2.74 insn per cycle - 3.358813940 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2388) (avx2: 0) (512y: 0) (512z: 0) + 9,377,032,948 cycles:u # 3.344 GHz (74.92%) + 9,531,206 stalled-cycles-frontend:u # 0.10% frontend cycles idle (74.91%) + 1,834,437,945 stalled-cycles-backend:u # 19.56% backend cycles idle (74.91%) + 26,195,629,639 instructions:u # 2.79 insn per cycle + # 0.07 stalled cycles per insn (74.91%) + 2.808195918 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2294) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105804 Relative difference = 2.103617270732513e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.244474e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.678972e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.678972e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.111219 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 7.374241e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.917522e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.917522e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 1.612540 sec INFO: No Floating Point Exceptions have been reported - 5,673,148,574 cycles # 2.682 GHz - 12,029,125,150 instructions # 2.12 insn per cycle - 2.116589548 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2532) (512y: 0) (512z: 0) + 5,293,962,555 cycles:u # 3.254 GHz (74.97%) + 7,444,911 stalled-cycles-frontend:u # 0.14% frontend cycles idle (74.92%) + 495,611,947 stalled-cycles-backend:u # 9.36% backend cycles idle (74.94%) + 12,323,263,372 instructions:u # 2.33 insn per cycle + # 0.04 stalled cycles per insn (74.94%) + 1.640715925 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2548) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.759844e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.282682e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.282682e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 1.934603 sec -INFO: No Floating Point Exceptions have been reported - 5,185,525,755 cycles # 2.675 GHz - 11,158,849,555 instructions # 2.15 insn per cycle - 1.940086470 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2195) (512y: 148) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134710926107935 -Relative difference = 2.103616776553298e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.492671e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.676216e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.676216e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.114794 sec -INFO: No Floating Point Exceptions have been reported - 5,530,850,143 cycles # 1.773 GHz - 8,071,834,418 instructions # 1.46 insn per cycle - 3.120392658 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1471) (512y: 129) (512z: 1684) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134710926107935 -Relative difference = 2.103616776553298e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt index 746b04ecac..84e30da395 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-05-16_15:20:58 +DATE: 2024-05-16_16:48:14 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.594523e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.092654e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.188255e+08 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 0.533303 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.514011e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.380921e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.420383e+07 ) sec^-1 +MeanMatrixElemValue = ( 7.088120e+00 +- 1.629041e-01 ) GeV^0 +TOTAL : 0.386498 sec INFO: No Floating Point Exceptions have been reported - 2,159,610,833 cycles # 2.816 GHz - 3,095,961,302 instructions # 1.43 insn per cycle - 0.825364511 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 931,603,369 cycles:u # 2.249 GHz (74.32%) + 2,257,954 stalled-cycles-frontend:u # 0.24% frontend cycles idle (74.41%) + 5,850,332 stalled-cycles-backend:u # 0.63% backend cycles idle (74.41%) + 1,489,222,217 instructions:u # 1.60 insn per cycle + # 0.00 stalled cycles per insn (74.92%) + 0.442258532 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 4.313472e+00 -Avg ME (F77/GPU) = 4.3134710926110280 -Relative difference = 2.1036162329561614e-07 +Avg ME (F77/GPU) = 4.3134710926110271 +Relative difference = 2.1036162350152416e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.884407e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.935333e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.935333e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 5.676327 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.244015e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.296973e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.296973e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 4.865774 sec INFO: No Floating Point Exceptions have been reported - 16,262,813,557 cycles # 2.863 GHz - 43,266,807,177 instructions # 2.66 insn per cycle - 5.681729392 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 662) (avx2: 0) (512y: 0) (512z: 0) + 15,938,569,834 cycles:u # 3.266 GHz (74.93%) + 10,443,496 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.92%) + 28,554,178 stalled-cycles-backend:u # 0.18% backend cycles idle (74.97%) + 42,358,216,399 instructions:u # 2.66 insn per cycle + # 0.00 stalled cycles per insn (75.05%) + 4.884255117 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 568) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105795 Relative difference = 2.1036172727915933e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.290556e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.463505e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.463505e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.298765 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 3.956447e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.139265e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.139265e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 2.845800 sec INFO: No Floating Point Exceptions have been reported - 9,454,937,516 cycles # 2.862 GHz - 25,430,832,847 instructions # 2.69 insn per cycle - 3.304226277 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2268) (avx2: 0) (512y: 0) (512z: 0) + 8,988,598,431 cycles:u # 3.142 GHz (74.84%) + 9,462,849 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.93%) + 701,322,943 stalled-cycles-backend:u # 7.80% backend cycles idle (75.07%) + 24,974,362,465 instructions:u # 2.78 insn per cycle + # 0.03 stalled cycles per insn (75.12%) + 2.864819100 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2130) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105804 Relative difference = 2.103617270732513e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.695348e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.042916e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.042916e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.345615 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 6.318500e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.728288e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.728288e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 1.857120 sec INFO: No Floating Point Exceptions have been reported - 6,296,882,273 cycles # 2.679 GHz - 13,638,682,807 instructions # 2.17 insn per cycle - 2.351107442 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2629) (512y: 0) (512z: 0) + 5,877,401,974 cycles:u # 3.139 GHz (74.82%) + 8,272,614 stalled-cycles-frontend:u # 0.14% frontend cycles idle (74.80%) + 1,152,017,174 stalled-cycles-backend:u # 19.60% backend cycles idle (74.90%) + 13,570,647,489 instructions:u # 2.31 insn per cycle + # 0.08 stalled cycles per insn (75.11%) + 1.876681707 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2610) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.910957e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.286382e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.286382e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.246822 sec -INFO: No Floating Point Exceptions have been reported - 6,026,491,701 cycles # 2.677 GHz - 12,722,860,113 instructions # 2.11 insn per cycle - 2.252413644 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2146) (512y: 296) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134710926107935 -Relative difference = 2.103616776553298e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.420299e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.596534e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.596534e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.177504 sec -INFO: No Floating Point Exceptions have been reported - 5,627,100,070 cycles # 1.769 GHz - 8,928,441,764 instructions # 1.59 insn per cycle - 3.183062200 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1357) (512y: 171) (512z: 1777) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134710926107935 -Relative difference = 2.103616776553298e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt index a9079e9716..bbd4bac714 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt @@ -1,228 +1,181 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-05-16_15:21:23 +DATE: 2024-05-16_16:48:29 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.566221e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.504693e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.775023e+08 ) sec^-1 -MeanMatrixElemValue = ( 7.154219e+00 +- 1.620281e-01 ) GeV^0 -TOTAL : 0.485925 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.439069e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.202389e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.293837e+08 ) sec^-1 +MeanMatrixElemValue = ( 6.834692e+00 +- 1.463624e-01 ) GeV^0 +TOTAL : 0.326772 sec INFO: No Floating Point Exceptions have been reported - 2,003,287,538 cycles # 2.816 GHz - 2,880,414,118 instructions # 1.44 insn per cycle - 0.769648039 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 780,945,559 cycles:u # 2.231 GHz (73.89%) + 2,133,802 stalled-cycles-frontend:u # 0.27% frontend cycles idle (75.05%) + 5,808,164 stalled-cycles-backend:u # 0.74% backend cycles idle (75.53%) + 1,289,335,778 instructions:u # 1.65 insn per cycle + # 0.00 stalled cycles per insn (76.77%) + 0.377797926 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 4.313490e+00 -Avg ME (F77/GPU) = 4.3136695463908836 -Relative difference = 4.162439020000051e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 4.313524e+00 +Avg ME (F77/GPU) = 4.3135525460820645 +Relative difference = 6.617809954082434e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.938364e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.994818e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.994818e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.175644e+00 +- 1.658767e-01 ) GeV^0 -TOTAL : 5.499884 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.707985e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.781497e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.781497e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.315915e+00 +- 1.953829e-01 ) GeV^0 +TOTAL : 4.025408 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 15,743,516,639 cycles # 2.861 GHz - 42,225,863,593 instructions # 2.68 insn per cycle - 5.505101290 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 601) (avx2: 0) (512y: 0) (512z: 0) + 13,828,819,171 cycles:u # 3.428 GHz (74.96%) + 17,085,911 stalled-cycles-frontend:u # 0.12% frontend cycles idle (75.02%) + 990,078,579 stalled-cycles-backend:u # 7.16% backend cycles idle (75.02%) + 40,682,624,063 instructions:u # 2.94 insn per cycle + # 0.02 stalled cycles per insn (75.02%) + 4.038704610 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 598) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313574e+00 -Avg ME (F77/C++) = 4.3135739049175754 -Relative difference = 2.2042608890083832e-08 +Avg ME (F77/C++) = 4.3135737644042820 +Relative difference = 5.461728906135488e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.494085e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.834702e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.834702e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.175642e+00 +- 1.658767e-01 ) GeV^0 -TOTAL : 2.423560 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 6.009062e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.395404e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.395404e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.315915e+00 +- 1.953829e-01 ) GeV^0 +TOTAL : 1.896387 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,948,197,620 cycles # 2.861 GHz - 16,919,710,710 instructions # 2.44 insn per cycle - 2.428887408 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2983) (avx2: 0) (512y: 0) (512z: 0) + 6,434,798,098 cycles:u # 3.377 GHz (74.85%) + 11,977,350 stalled-cycles-frontend:u # 0.19% frontend cycles idle (74.83%) + 2,484,080,589 stalled-cycles-backend:u # 38.60% backend cycles idle (74.83%) + 16,319,812,185 instructions:u # 2.54 insn per cycle + # 0.15 stalled cycles per insn (74.95%) + 1.911059116 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2735) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313572e+00 -Avg ME (F77/C++) = 4.3135722205042839 -Relative difference = 5.111872113533787e-08 +Avg ME (C++/C++) = 4.313573e+00 +Avg ME (F77/C++) = 4.3135733148083091 +Relative difference = 7.298086973342306e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.820914e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.816967e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.816967e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.429543 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.169158e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.311982e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.311982e+06 ) sec^-1 +MeanMatrixElemValue = ( 7.289197e+00 +- 1.809101e-01 ) GeV^0 +TOTAL : 1.047632 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 3,855,960,900 cycles # 2.689 GHz - 7,989,689,028 instructions # 2.07 insn per cycle - 1.434693752 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3289) (512y: 0) (512z: 0) + 3,453,398,445 cycles:u # 3.267 GHz (75.03%) + 9,191,807 stalled-cycles-frontend:u # 0.27% frontend cycles idle (75.03%) + 1,188,577,015 stalled-cycles-backend:u # 34.42% backend cycles idle (75.03%) + 8,026,521,272 instructions:u # 2.32 insn per cycle + # 0.15 stalled cycles per insn (75.02%) + 1.060765861 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3309) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313565e+00 -Avg ME (F77/C++) = 4.3135645699221641 -Relative difference = 9.97035713074993e-08 +Avg ME (F77/C++) = 4.3135650876211002 +Relative difference = 2.03129199623388e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.282128e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.407558e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.407558e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.355217 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 3,662,603,190 cycles # 2.693 GHz - 7,491,885,625 instructions # 2.05 insn per cycle - 1.360533114 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3036) (512y: 23) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313565e+00 -Avg ME (F77/C++) = 4.3135645699221641 -Relative difference = 9.97035713074993e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.072932e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.653576e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.653576e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.816585 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 3,322,287,385 cycles # 1.825 GHz - 5,988,754,595 instructions # 1.80 insn per cycle - 1.821834164 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2418) (512y: 32) (512z: 2031) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313564e+00 -Avg ME (F77/C++) = 4.3135643783025444 -Relative difference = 8.770069111236825e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt index 0359df7b77..3c152fc040 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt @@ -1,228 +1,181 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-05-16_15:21:44 +DATE: 2024-05-16_16:48:41 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.575897e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.505600e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.778243e+08 ) sec^-1 -MeanMatrixElemValue = ( 7.154219e+00 +- 1.620281e-01 ) GeV^0 -TOTAL : 0.488373 sec +EvtsPerSec[Rmb+ME] (23) = ( 9.028132e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.444735e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.556414e+08 ) sec^-1 +MeanMatrixElemValue = ( 6.834692e+00 +- 1.463624e-01 ) GeV^0 +TOTAL : 0.327398 sec INFO: No Floating Point Exceptions have been reported - 2,007,752,645 cycles # 2.812 GHz - 2,828,437,251 instructions # 1.41 insn per cycle - 0.772837040 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 768,285,604 cycles:u # 2.176 GHz (74.98%) + 2,183,750 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.46%) + 4,903,137 stalled-cycles-backend:u # 0.64% backend cycles idle (75.41%) + 1,359,025,322 instructions:u # 1.77 insn per cycle + # 0.00 stalled cycles per insn (74.33%) + 0.379315567 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 4.313490e+00 -Avg ME (F77/GPU) = 4.3136695463908836 -Relative difference = 4.162439020000051e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 4.313524e+00 +Avg ME (F77/GPU) = 4.3135525460820645 +Relative difference = 6.617809954082434e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.991117e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.050649e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.050649e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.175644e+00 +- 1.658767e-01 ) GeV^0 -TOTAL : 5.356246 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.701216e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.774331e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.774331e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.315915e+00 +- 1.953829e-01 ) GeV^0 +TOTAL : 4.033796 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 15,339,535,429 cycles # 2.862 GHz - 42,474,905,629 instructions # 2.77 insn per cycle - 5.361339903 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 559) (avx2: 0) (512y: 0) (512z: 0) + 13,835,099,929 cycles:u # 3.422 GHz (74.94%) + 17,428,820 stalled-cycles-frontend:u # 0.13% frontend cycles idle (75.04%) + 484,816,878 stalled-cycles-backend:u # 3.50% backend cycles idle (75.07%) + 41,359,499,614 instructions:u # 2.99 insn per cycle + # 0.01 stalled cycles per insn (75.07%) + 4.047206130 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 555) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313574e+00 -Avg ME (F77/C++) = 4.3135739491553977 -Relative difference = 1.1787117204016727e-08 +Avg ME (F77/C++) = 4.3135737563716248 +Relative difference = 5.647947044645654e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.134209e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.583662e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.583662e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.175642e+00 +- 1.658767e-01 ) GeV^0 -TOTAL : 2.132369 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 6.839951e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.359656e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.359656e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.315915e+00 +- 1.953829e-01 ) GeV^0 +TOTAL : 1.683941 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,119,263,046 cycles # 2.864 GHz - 16,261,701,502 instructions # 2.66 insn per cycle - 2.137647028 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2702) (avx2: 0) (512y: 0) (512z: 0) + 5,586,160,818 cycles:u # 3.299 GHz (74.96%) + 12,340,652 stalled-cycles-frontend:u # 0.22% frontend cycles idle (74.96%) + 1,578,714,685 stalled-cycles-backend:u # 28.26% backend cycles idle (74.96%) + 16,066,756,783 instructions:u # 2.88 insn per cycle + # 0.10 stalled cycles per insn (74.97%) + 1.697057412 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2557) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313572e+00 -Avg ME (F77/C++) = 4.3135722205042839 -Relative difference = 5.111872113533787e-08 +Avg ME (C++/C++) = 4.313573e+00 +Avg ME (F77/C++) = 4.3135733148083091 +Relative difference = 7.298086973342306e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.498649e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.173623e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.173623e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.703269 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 8.504699e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.235549e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.235549e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.289198e+00 +- 1.809101e-01 ) GeV^0 +TOTAL : 1.383506 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 4,581,699,390 cycles # 2.683 GHz - 9,041,394,873 instructions # 1.97 insn per cycle - 1.708700782 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3558) (512y: 0) (512z: 0) + 4,638,091,860 cycles:u # 3.330 GHz (74.77%) + 9,866,921 stalled-cycles-frontend:u # 0.21% frontend cycles idle (74.75%) + 1,794,856,842 stalled-cycles-backend:u # 38.70% backend cycles idle (74.76%) + 10,093,674,408 instructions:u # 2.18 insn per cycle + # 0.18 stalled cycles per insn (75.02%) + 1.396837524 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3914) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313565e+00 -Avg ME (F77/C++) = 4.3135645687580109 -Relative difference = 9.997345323075056e-08 +Avg ME (F77/C++) = 4.3135650896001607 +Relative difference = 2.0771719231865954e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.705142e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.424759e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.424759e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.652652 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 4,411,023,052 cycles # 2.662 GHz - 8,532,140,610 instructions # 1.93 insn per cycle - 1.658018216 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3311) (512y: 10) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313565e+00 -Avg ME (F77/C++) = 4.3135645687580109 -Relative difference = 9.997345323075056e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.118773e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.709641e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.709641e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.803301 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 3,302,699,675 cycles # 1.827 GHz - 5,958,419,273 instructions # 1.80 insn per cycle - 1.808538430 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2357) (512y: 32) (512z: 2014) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313564e+00 -Avg ME (F77/C++) = 4.3135643783025444 -Relative difference = 8.770069111236825e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt index 4345b3c851..e6690e6865 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt @@ -1,102 +1,105 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-05-16_15:22:04 +DATE: 2024-05-16_16:48:53 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.596790e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.087710e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.182609e+08 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 0.531791 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.584227e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.454852e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.497116e+07 ) sec^-1 +MeanMatrixElemValue = ( 7.088120e+00 +- 1.629041e-01 ) GeV^0 +TOTAL : 0.381980 sec INFO: No Floating Point Exceptions have been reported - 2,158,479,665 cycles # 2.816 GHz - 3,115,947,911 instructions # 1.44 insn per cycle - 0.824595914 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 918,668,026 cycles:u # 2.236 GHz (75.67%) + 2,058,167 stalled-cycles-frontend:u # 0.22% frontend cycles idle (75.45%) + 5,134,418 stalled-cycles-backend:u # 0.56% backend cycles idle (74.72%) + 1,480,506,140 instructions:u # 1.61 insn per cycle + # 0.00 stalled cycles per insn (74.72%) + 0.439181912 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 4.313472e+00 Avg ME (F77/GPU) = 4.3134711012809239 Relative difference = 2.0835166567625394e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.739846e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.783362e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.783362e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 6.139062 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.327536e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.383060e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.383060e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 4.696016 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 17,579,172,412 cycles # 2.862 GHz - 41,767,715,738 instructions # 2.38 insn per cycle - 6.144566394 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 655) (avx2: 0) (512y: 0) (512z: 0) + 16,143,011,940 cycles:u # 3.427 GHz (75.03%) + 38,177,717 stalled-cycles-frontend:u # 0.24% frontend cycles idle (75.04%) + 2,321,395,423 stalled-cycles-backend:u # 14.38% backend cycles idle (75.04%) + 41,577,988,005 instructions:u # 2.58 insn per cycle + # 0.06 stalled cycles per insn (75.04%) + 4.714763271 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 635) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -104,29 +107,32 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.944235e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.080846e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.080846e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.674671 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 4.199625e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.399809e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.399809e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 2.686359 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 10,157,870,701 cycles # 2.761 GHz - 26,355,211,403 instructions # 2.59 insn per cycle - 3.680088821 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2438) (avx2: 0) (512y: 0) (512z: 0) + 9,038,108,841 cycles:u # 3.346 GHz (74.87%) + 14,730,044 stalled-cycles-frontend:u # 0.16% frontend cycles idle (74.83%) + 1,762,459,713 stalled-cycles-backend:u # 19.50% backend cycles idle (74.93%) + 25,979,060,225 instructions:u # 2.87 insn per cycle + # 0.07 stalled cycles per insn (75.08%) + 2.704895398 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2393) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -134,29 +140,32 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.512494e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.830362e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.830362e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.435332 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 7.420629e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.970166e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.970166e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 1.603532 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,512,604,303 cycles # 2.669 GHz - 12,120,159,732 instructions # 1.86 insn per cycle - 2.440902409 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2718) (512y: 0) (512z: 0) + 5,323,273,363 cycles:u # 3.290 GHz (74.83%) + 18,468,340 stalled-cycles-frontend:u # 0.35% frontend cycles idle (74.78%) + 1,325,902,509 stalled-cycles-backend:u # 24.91% backend cycles idle (74.79%) + 12,235,378,310 instructions:u # 2.30 insn per cycle + # 0.11 stalled cycles per insn (75.00%) + 1.621743520 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2788) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -164,65 +173,9 @@ Avg ME (F77/C++) = 4.3134712319139954 Relative difference = 1.7806676491157786e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.920988e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.300442e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.300442e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.244169 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,018,583,564 cycles # 2.676 GHz - 11,228,279,694 instructions # 1.87 insn per cycle - 2.249711111 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2369) (512y: 150) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134712319139954 -Relative difference = 1.7806676491157786e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.148571e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.297302e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.297302e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.442171 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,072,730,798 cycles # 1.762 GHz - 8,215,005,190 instructions # 1.35 insn per cycle - 3.447734816 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1787) (512y: 134) (512z: 1755) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134712319139954 -Relative difference = 1.7806676491157786e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt index fc67fec042..d361db6bfb 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt @@ -1,102 +1,105 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-05-16_15:22:31 +DATE: 2024-05-16_16:49:08 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.615689e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.096145e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.193163e+08 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 0.527662 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.538104e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.390574e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.430013e+07 ) sec^-1 +MeanMatrixElemValue = ( 7.088120e+00 +- 1.629041e-01 ) GeV^0 +TOTAL : 0.379939 sec INFO: No Floating Point Exceptions have been reported - 2,187,091,067 cycles # 2.822 GHz - 3,143,599,790 instructions # 1.44 insn per cycle - 0.831715891 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 915,401,628 cycles:u # 2.244 GHz (74.99%) + 2,223,644 stalled-cycles-frontend:u # 0.24% frontend cycles idle (75.39%) + 5,072,697 stalled-cycles-backend:u # 0.55% backend cycles idle (74.53%) + 1,494,510,160 instructions:u # 1.63 insn per cycle + # 0.00 stalled cycles per insn (74.53%) + 0.438042335 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 4.313472e+00 Avg ME (F77/GPU) = 4.3134711012809239 Relative difference = 2.0835166567625394e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.750132e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.794255e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.794255e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 6.103500 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.291427e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.346384e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.346384e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 4.765733 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 17,473,867,626 cycles # 2.861 GHz - 43,052,630,037 instructions # 2.46 insn per cycle - 6.108967949 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 651) (avx2: 0) (512y: 0) (512z: 0) + 16,235,324,060 cycles:u # 3.396 GHz (74.90%) + 33,413,921 stalled-cycles-frontend:u # 0.21% frontend cycles idle (74.96%) + 65,906,967 stalled-cycles-backend:u # 0.41% backend cycles idle (75.04%) + 42,579,881,927 instructions:u # 2.62 insn per cycle + # 0.00 stalled cycles per insn (75.07%) + 4.784072749 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 577) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -104,29 +107,32 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.176372e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.336517e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.336517e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.414423 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 4.139701e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.335093e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.335093e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 2.723584 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 9,783,940,024 cycles # 2.862 GHz - 25,167,910,576 instructions # 2.57 insn per cycle - 3.420037518 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2276) (avx2: 0) (512y: 0) (512z: 0) + 9,025,902,683 cycles:u # 3.296 GHz (74.94%) + 15,519,832 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.89%) + 1,026,034,829 stalled-cycles-backend:u # 11.37% backend cycles idle (74.88%) + 24,993,447,379 instructions:u # 2.77 insn per cycle + # 0.04 stalled cycles per insn (74.89%) + 2.741735540 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2226) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -134,29 +140,32 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.178030e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.451835e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.451835e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.622185 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 6.252836e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.647922e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.647922e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 1.867792 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 7,019,924,583 cycles # 2.672 GHz - 12,790,606,448 instructions # 1.82 insn per cycle - 2.627804246 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2699) (512y: 0) (512z: 0) + 6,114,690,879 cycles:u # 3.248 GHz (74.93%) + 18,332,899 stalled-cycles-frontend:u # 0.30% frontend cycles idle (74.93%) + 1,563,155,302 stalled-cycles-backend:u # 25.56% backend cycles idle (74.94%) + 13,635,238,163 instructions:u # 2.23 insn per cycle + # 0.11 stalled cycles per insn (74.94%) + 1.885955565 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2930) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -164,65 +173,9 @@ Avg ME (F77/C++) = 4.3134712319139954 Relative difference = 1.7806676491157786e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.488078e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.801083e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.801083e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.447720 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,546,937,322 cycles # 2.670 GHz - 12,109,881,739 instructions # 1.85 insn per cycle - 2.453162643 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2351) (512y: 227) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134712319139954 -Relative difference = 1.7806676491157786e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.983756e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.117708e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.117708e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.627336 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,377,996,877 cycles # 1.756 GHz - 8,984,744,450 instructions # 1.41 insn per cycle - 3.632964633 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1892) (512y: 178) (512z: 2083) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134712319139954 -Relative difference = 1.7806676491157786e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt index f2a95b68c4..c083e9c540 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt @@ -1,233 +1,193 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-05-16_15:19:32 +DATE: 2024-05-16_16:47:20 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.205899e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.229515e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.233614e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.467479 sec +EvtsPerSec[Rmb+ME] (23) = ( 8.249237e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.105605e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.108817e+03 ) sec^-1 +MeanMatrixElemValue = ( 5.989810e-05 +- 3.867612e-05 ) GeV^-4 +TOTAL : 0.397188 sec INFO: No Floating Point Exceptions have been reported - 1,929,394,895 cycles # 2.809 GHz - 2,774,653,842 instructions # 1.44 insn per cycle - 0.745241861 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 1,004,692,712 cycles:u # 2.519 GHz (72.93%) + 2,262,405 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.51%) + 5,225,315 stalled-cycles-backend:u # 0.52% backend cycles idle (75.85%) + 1,406,860,225 instructions:u # 1.40 insn per cycle + # 0.00 stalled cycles per insn (75.73%) + 0.445011634 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.854750e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.994181e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.003911e+05 ) sec^-1 -MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 -TOTAL : 0.485365 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.670202e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.869175e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.870322e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.402315e-01 +- 3.184905e-01 ) GeV^-4 +TOTAL : 0.404451 sec INFO: No Floating Point Exceptions have been reported - 1,990,830,698 cycles # 2.816 GHz - 2,942,277,354 instructions # 1.48 insn per cycle - 0.765598417 seconds time elapsed + 1,119,277,176 cycles:u # 2.635 GHz (74.63%) + 2,281,293 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.41%) + 5,778,701 stalled-cycles-backend:u # 0.52% backend cycles idle (75.54%) + 1,501,239,436 instructions:u # 1.34 insn per cycle + # 0.00 stalled cycles per insn (75.54%) + 0.448625365 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 8.127459e-06 -Avg ME (F77/GPU) = 8.1274562860176604E-006 -Relative difference = 3.3392753366481633e-07 +Avg ME (F77/GPU) = 8.1274562860176587E-006 +Relative difference = 3.3392753387325367e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.339413e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.342602e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.342602e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.163415 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 4.033110e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.036614e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.036614e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.136579 sec INFO: No Floating Point Exceptions have been reported - 474,956,853 cycles # 2.847 GHz - 1,396,923,375 instructions # 2.94 insn per cycle - 0.167372542 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3991) (avx2: 0) (512y: 0) (512z: 0) + 469,654,777 cycles:u # 3.361 GHz (71.58%) + 207,304 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.39%) + 65,129,868 stalled-cycles-backend:u # 13.87% backend cycles idle (77.07%) + 1,392,067,783 instructions:u # 2.96 insn per cycle + # 0.05 stalled cycles per insn (77.12%) + 0.142722037 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1926) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274562860167185E-006 -Relative difference = 3.339276495559746e-07 +Avg ME (F77/C++) = 8.1274562860167168E-006 +Relative difference = 3.3392764976441195e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.350685e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.362490e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.362490e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.088282 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 9.240664e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.257925e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.257925e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.062711 sec INFO: No Floating Point Exceptions have been reported - 246,129,842 cycles # 2.680 GHz - 699,160,574 instructions # 2.84 insn per cycle - 0.092454839 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 9501) (avx2: 0) (512y: 0) (512z: 0) + 223,798,642 cycles:u # 3.393 GHz (73.30%) + 75,139 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.96%) + 21,166,758 stalled-cycles-backend:u # 9.46% backend cycles idle (75.76%) + 660,287,200 instructions:u # 2.95 insn per cycle + # 0.03 stalled cycles per insn (75.76%) + 0.069260875 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9270) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167168E-006 Relative difference = 3.3392764976441195e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.421076e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.426847e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.426847e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.042262 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.075126e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.084453e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.084453e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.031311 sec INFO: No Floating Point Exceptions have been reported - 120,513,094 cycles # 2.641 GHz - 260,079,134 instructions # 2.16 insn per cycle - 0.046206481 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8227) (512y: 0) (512z: 0) + 103,447,525 cycles:u # 2.943 GHz (73.59%) + 79,219 stalled-cycles-frontend:u # 0.08% frontend cycles idle (77.29%) + 11,522,827 stalled-cycles-backend:u # 11.14% backend cycles idle (77.29%) + 230,968,164 instructions:u # 2.23 insn per cycle + # 0.05 stalled cycles per insn (77.28%) + 0.039268152 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8240) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.614262e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.622122e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.622122e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.037855 sec -INFO: No Floating Point Exceptions have been reported - 109,022,775 cycles # 2.645 GHz - 240,308,972 instructions # 2.20 insn per cycle - 0.041904895 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7348) (512y: 150) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274562860174791E-006 -Relative difference = 3.3392755596761116e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.170349e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.175260e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.175260e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.050252 sec -INFO: No Floating Point Exceptions have been reported - 96,595,554 cycles # 1.802 GHz - 138,452,128 instructions # 1.43 insn per cycle - 0.054148545 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1692) (512y: 126) (512z: 6592) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274562860174791E-006 -Relative difference = 3.3392755596761116e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt index ca894b0a6d..6670b6deae 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt @@ -1,233 +1,193 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-05-16_15:19:42 +DATE: 2024-05-16_16:47:27 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.237277e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.263102e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.267367e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.467317 sec +EvtsPerSec[Rmb+ME] (23) = ( 8.524444e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.326578e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.329845e+03 ) sec^-1 +MeanMatrixElemValue = ( 5.989810e-05 +- 3.867612e-05 ) GeV^-4 +TOTAL : 0.375805 sec INFO: No Floating Point Exceptions have been reported - 1,933,877,717 cycles # 2.813 GHz - 2,829,779,417 instructions # 1.46 insn per cycle - 0.746133258 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 988,836,868 cycles:u # 2.489 GHz (74.90%) + 2,168,415 stalled-cycles-frontend:u # 0.22% frontend cycles idle (75.86%) + 5,335,512 stalled-cycles-backend:u # 0.54% backend cycles idle (75.86%) + 1,446,352,813 instructions:u # 1.46 insn per cycle + # 0.00 stalled cycles per insn (74.48%) + 0.424375867 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.945887e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.087010e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.096853e+05 ) sec^-1 -MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 -TOTAL : 0.483889 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.657513e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.864111e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.864991e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.402315e-01 +- 3.184905e-01 ) GeV^-4 +TOTAL : 0.402087 sec INFO: No Floating Point Exceptions have been reported - 2,005,783,112 cycles # 2.816 GHz - 2,927,359,248 instructions # 1.46 insn per cycle - 0.768925329 seconds time elapsed + 1,123,109,105 cycles:u # 2.644 GHz (73.81%) + 2,150,436 stalled-cycles-frontend:u # 0.19% frontend cycles idle (74.58%) + 5,271,012 stalled-cycles-backend:u # 0.47% backend cycles idle (75.42%) + 1,495,903,512 instructions:u # 1.33 insn per cycle + # 0.00 stalled cycles per insn (75.54%) + 0.448148581 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 8.127459e-06 -Avg ME (F77/GPU) = 8.1274562860176604E-006 -Relative difference = 3.3392753366481633e-07 +Avg ME (F77/GPU) = 8.1274562860176587E-006 +Relative difference = 3.3392753387325367e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.344408e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.347652e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.347652e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.162339 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 4.078771e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.082315e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.082315e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.134582 sec INFO: No Floating Point Exceptions have been reported - 471,806,818 cycles # 2.848 GHz - 1,391,948,601 instructions # 2.95 insn per cycle - 0.166295977 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3869) (avx2: 0) (512y: 0) (512z: 0) + 471,983,208 cycles:u # 3.420 GHz (71.54%) + 148,000 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.37%) + 64,228,017 stalled-cycles-backend:u # 13.61% backend cycles idle (76.82%) + 1,385,303,937 instructions:u # 2.94 insn per cycle + # 0.05 stalled cycles per insn (76.82%) + 0.141959219 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1902) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274562860167185E-006 -Relative difference = 3.339276495559746e-07 +Avg ME (F77/C++) = 8.1274562860167168E-006 +Relative difference = 3.3392764976441195e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.367799e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.379601e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.379601e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.087176 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 9.158759e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.175984e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.175984e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.062631 sec INFO: No Floating Point Exceptions have been reported - 243,999,829 cycles # 2.694 GHz - 695,186,413 instructions # 2.85 insn per cycle - 0.091139423 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 9537) (avx2: 0) (512y: 0) (512z: 0) + 200,577,982 cycles:u # 3.034 GHz (76.01%) + 82,947 stalled-cycles-frontend:u # 0.04% frontend cycles idle (75.82%) + 27,068,799 stalled-cycles-backend:u # 13.50% backend cycles idle (75.82%) + 657,741,134 instructions:u # 3.28 insn per cycle + # 0.04 stalled cycles per insn (75.82%) + 0.069339406 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9325) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167168E-006 Relative difference = 3.3392764976441195e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.395387e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.400899e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.400899e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.042187 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.082004e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.090663e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.090663e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.030350 sec INFO: No Floating Point Exceptions have been reported - 119,801,052 cycles # 2.624 GHz - 255,741,591 instructions # 2.13 insn per cycle - 0.046174431 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8181) (512y: 0) (512z: 0) + 105,410,004 cycles:u # 3.128 GHz (73.38%) + 64,571 stalled-cycles-frontend:u # 0.06% frontend cycles idle (76.34%) + 12,580,128 stalled-cycles-backend:u # 11.93% backend cycles idle (76.29%) + 227,937,467 instructions:u # 2.16 insn per cycle + # 0.06 stalled cycles per insn (76.30%) + 0.037102587 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8196) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.613988e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.621406e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.621406e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.037041 sec -INFO: No Floating Point Exceptions have been reported - 106,534,081 cycles # 2.639 GHz - 235,917,118 instructions # 2.21 insn per cycle - 0.041041363 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7301) (512y: 150) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274562860174791E-006 -Relative difference = 3.3392755596761116e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.167962e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.172897e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.172897e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.049548 sec -INFO: No Floating Point Exceptions have been reported - 94,554,513 cycles # 1.786 GHz - 133,899,064 instructions # 1.42 insn per cycle - 0.053428613 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1641) (512y: 126) (512z: 6597) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274562860174791E-006 -Relative difference = 3.3392755596761116e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt index f86e27869e..a073daefbe 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt @@ -1,233 +1,193 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-05-16_15:19:53 +DATE: 2024-05-16_16:47:34 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.541598e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.553658e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.556693e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.188142e-04 +- 6.565203e-04 ) GeV^-4 -TOTAL : 0.467629 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.580358e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.790988e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.792117e+04 ) sec^-1 +MeanMatrixElemValue = ( 3.100300e-04 +- 2.256635e-04 ) GeV^-4 +TOTAL : 0.331774 sec INFO: No Floating Point Exceptions have been reported - 1,964,166,954 cycles # 2.815 GHz - 2,823,406,286 instructions # 1.44 insn per cycle - 0.754117473 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 845,933,702 cycles:u # 2.396 GHz (74.12%) + 2,215,776 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.36%) + 5,136,072 stalled-cycles-backend:u # 0.61% backend cycles idle (74.36%) + 1,366,289,325 instructions:u # 1.62 insn per cycle + # 0.00 stalled cycles per insn (75.11%) + 0.377329106 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.614317e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.731134e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.742615e+05 ) sec^-1 -MeanMatrixElemValue = ( 8.020493e-03 +- 4.025604e-03 ) GeV^-4 -TOTAL : 0.468434 sec +EvtsPerSec[Rmb+ME] (23) = ( 5.082122e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.036129e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.039928e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.017784e-02 +- 5.681015e-02 ) GeV^-4 +TOTAL : 0.343540 sec INFO: No Floating Point Exceptions have been reported - 1,946,164,211 cycles # 2.817 GHz - 2,847,399,547 instructions # 1.46 insn per cycle - 0.748191861 seconds time elapsed + 891,904,831 cycles:u # 2.445 GHz (74.08%) + 2,269,831 stalled-cycles-frontend:u # 0.25% frontend cycles idle (74.38%) + 5,457,551 stalled-cycles-backend:u # 0.61% backend cycles idle (75.71%) + 1,334,940,313 instructions:u # 1.50 insn per cycle + # 0.00 stalled cycles per insn (75.81%) + 0.388639407 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 8.127250e-06 -Avg ME (F77/GPU) = 8.1272870954487585E-006 -Relative difference = 4.564329725014175e-06 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 8.127320e-06 +Avg ME (F77/GPU) = 8.1275379236374627E-006 +Relative difference = 2.681371441780168e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.448019e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.451516e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.451516e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.177153e-04 +- 6.554185e-04 ) GeV^-4 -TOTAL : 0.158474 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 4.470121e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.474326e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.474326e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.274747e-01 +- 1.272814e-01 ) GeV^-4 +TOTAL : 0.123780 sec INFO: No Floating Point Exceptions have been reported - 461,638,972 cycles # 2.852 GHz - 1,393,493,000 instructions # 3.02 insn per cycle - 0.162490485 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3070) (avx2: 0) (512y: 0) (512z: 0) + 413,295,293 cycles:u # 3.248 GHz (74.93%) + 89,291 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.86%) + 45,921,697 stalled-cycles-backend:u # 11.11% backend cycles idle (74.86%) + 1,294,421,183 instructions:u # 3.13 insn per cycle + # 0.04 stalled cycles per insn (74.86%) + 0.130474069 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1611) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127811e-06 -Avg ME (F77/C++) = 8.1278105211728276E-006 -Relative difference = 5.891219330978222e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127810e-06 +Avg ME (F77/C++) = 8.1278100323291073E-006 +Relative difference = 3.977591502689147e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.201120e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.205395e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.205395e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.177152e-04 +- 6.554185e-04 ) GeV^-4 -TOTAL : 0.048902 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.733679e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.739879e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.739879e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.274746e-01 +- 1.272813e-01 ) GeV^-4 +TOTAL : 0.036021 sec INFO: No Floating Point Exceptions have been reported - 138,099,810 cycles # 2.644 GHz - 375,723,801 instructions # 2.72 insn per cycle - 0.052805368 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:10134) (avx2: 0) (512y: 0) (512z: 0) + 113,806,570 cycles:u # 2.899 GHz (75.21%) + 81,457 stalled-cycles-frontend:u # 0.07% frontend cycles idle (79.65%) + 17,796,008 stalled-cycles-backend:u # 15.64% backend cycles idle (79.65%) + 342,773,119 instructions:u # 3.01 insn per cycle + # 0.05 stalled cycles per insn (79.65%) + 0.042511695 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9799) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127809e-06 -Avg ME (F77/C++) = 8.1278090510674588E-006 -Relative difference = 6.2830535070193674e-09 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127807e-06 +Avg ME (F77/C++) = 8.1278071680283782E-006 +Relative difference = 2.0673273707686565e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.699468e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.721720e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.721720e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 -TOTAL : 0.024500 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 4.069327e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.103583e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.103583e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.275185e-01 +- 1.273251e-01 ) GeV^-4 +TOTAL : 0.018562 sec INFO: No Floating Point Exceptions have been reported - 72,431,086 cycles # 2.595 GHz - 146,734,646 instructions # 2.03 insn per cycle - 0.028413255 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8933) (512y: 0) (512z: 0) + 59,391,676 cycles:u # 2.673 GHz (73.35%) + 65,296 stalled-cycles-frontend:u # 0.11% frontend cycles idle (65.42%) + 4,060,421 stalled-cycles-backend:u # 6.84% backend cycles idle (64.09%) + 105,349,453 instructions:u # 1.77 insn per cycle + # 0.04 stalled cycles per insn (64.02%) + 0.025321924 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8980) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127537e-06 -Avg ME (F77/C++) = 8.1275366216540664E-006 -Relative difference = 4.655111786058001e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127535e-06 +Avg ME (F77/C++) = 8.1275352476332691E-006 +Relative difference = 3.04684346075092e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.950281e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.979563e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.979563e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 -TOTAL : 0.023199 sec -INFO: No Floating Point Exceptions have been reported - 67,511,576 cycles # 2.517 GHz - 136,466,222 instructions # 2.02 insn per cycle - 0.027372188 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8164) (512y: 28) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127537e-06 -Avg ME (F77/C++) = 8.1275366216540664E-006 -Relative difference = 4.655111786058001e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.260359e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.280493e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.280493e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.165747e-04 +- 6.542824e-04 ) GeV^-4 -TOTAL : 0.028479 sec -INFO: No Floating Point Exceptions have been reported - 59,124,236 cycles # 1.860 GHz - 85,286,285 instructions # 1.44 insn per cycle - 0.032355670 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2572) (512y: 32) (512z: 6935) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127537e-06 -Avg ME (F77/C++) = 8.1275369863475849E-006 -Relative difference = 1.6797726498700304e-09 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt index 2af7dd76f9..21217c8e82 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt @@ -1,233 +1,193 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-05-16_15:20:03 +DATE: 2024-05-16_16:47:40 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.561126e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.572400e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.575387e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.188142e-04 +- 6.565203e-04 ) GeV^-4 -TOTAL : 0.469592 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.485272e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.698246e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.699297e+04 ) sec^-1 +MeanMatrixElemValue = ( 3.100300e-04 +- 2.256635e-04 ) GeV^-4 +TOTAL : 0.332618 sec INFO: No Floating Point Exceptions have been reported - 1,933,901,131 cycles # 2.816 GHz - 2,803,636,036 instructions # 1.45 insn per cycle - 0.744726293 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 852,682,179 cycles:u # 2.412 GHz (74.20%) + 2,114,869 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.32%) + 5,227,861 stalled-cycles-backend:u # 0.61% backend cycles idle (76.09%) + 1,357,000,612 instructions:u # 1.59 insn per cycle + # 0.00 stalled cycles per insn (75.91%) + 0.380309350 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.901730e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.003706e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.005157e+06 ) sec^-1 -MeanMatrixElemValue = ( 8.020495e-03 +- 4.025606e-03 ) GeV^-4 -TOTAL : 0.471774 sec +EvtsPerSec[Rmb+ME] (23) = ( 5.040010e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.913259e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.916790e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.017784e-02 +- 5.681015e-02 ) GeV^-4 +TOTAL : 0.343910 sec INFO: No Floating Point Exceptions have been reported - 1,934,886,385 cycles # 2.815 GHz - 2,830,776,229 instructions # 1.46 insn per cycle - 0.746474254 seconds time elapsed + 902,430,277 cycles:u # 2.474 GHz (73.57%) + 2,175,860 stalled-cycles-frontend:u # 0.24% frontend cycles idle (75.42%) + 5,109,947 stalled-cycles-backend:u # 0.57% backend cycles idle (75.75%) + 1,338,775,832 instructions:u # 1.48 insn per cycle + # 0.00 stalled cycles per insn (75.92%) + 0.387743482 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 8.127250e-06 -Avg ME (F77/GPU) = 8.1272870252982758E-006 -Relative difference = 4.555698209723637e-06 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 8.127320e-06 +Avg ME (F77/GPU) = 8.1275379236391975E-006 +Relative difference = 2.681371463124516e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.452227e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.455705e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.455705e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.177153e-04 +- 6.554185e-04 ) GeV^-4 -TOTAL : 0.157329 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 4.491799e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.496021e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.496021e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.274747e-01 +- 1.272814e-01 ) GeV^-4 +TOTAL : 0.122598 sec INFO: No Floating Point Exceptions have been reported - 458,573,657 cycles # 2.854 GHz - 1,388,574,447 instructions # 3.03 insn per cycle - 0.161242660 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2959) (avx2: 0) (512y: 0) (512z: 0) + 412,510,663 cycles:u # 3.266 GHz (74.78%) + 88,243 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.68%) + 48,686,355 stalled-cycles-backend:u # 11.80% backend cycles idle (74.68%) + 1,288,114,719 instructions:u # 3.12 insn per cycle + # 0.04 stalled cycles per insn (74.68%) + 0.129800445 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1591) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127811e-06 -Avg ME (F77/C++) = 8.1278105211728276E-006 -Relative difference = 5.891219330978222e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127810e-06 +Avg ME (F77/C++) = 8.1278100323291073E-006 +Relative difference = 3.977591502689147e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.204538e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.208976e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.208976e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.177152e-04 +- 6.554185e-04 ) GeV^-4 -TOTAL : 0.047932 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.658569e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.664875e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.664875e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.274746e-01 +- 1.272813e-01 ) GeV^-4 +TOTAL : 0.037202 sec INFO: No Floating Point Exceptions have been reported - 136,097,535 cycles # 2.652 GHz - 371,027,952 instructions # 2.73 insn per cycle - 0.051946079 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:10117) (avx2: 0) (512y: 0) (512z: 0) + 123,231,500 cycles:u # 3.020 GHz (64.45%) + 32,397 stalled-cycles-frontend:u # 0.03% frontend cycles idle (73.20%) + 17,917,241 stalled-cycles-backend:u # 14.54% backend cycles idle (80.43%) + 338,960,280 instructions:u # 2.75 insn per cycle + # 0.05 stalled cycles per insn (80.43%) + 0.045406964 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9782) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127809e-06 -Avg ME (F77/C++) = 8.1278090510674588E-006 -Relative difference = 6.2830535070193674e-09 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127807e-06 +Avg ME (F77/C++) = 8.1278071680283782E-006 +Relative difference = 2.0673273707686565e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.559391e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.580217e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.580217e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 -TOTAL : 0.024960 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 4.068604e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.102973e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.102973e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.275185e-01 +- 1.273251e-01 ) GeV^-4 +TOTAL : 0.017922 sec INFO: No Floating Point Exceptions have been reported - 71,167,021 cycles # 2.517 GHz - 142,031,155 instructions # 2.00 insn per cycle - 0.028974311 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8887) (512y: 0) (512z: 0) + 37,375,206 cycles:u # 1.762 GHz (62.38%) + 76,397 stalled-cycles-frontend:u # 0.20% frontend cycles idle (62.38%) + 7,317,942 stalled-cycles-backend:u # 19.58% backend cycles idle (62.25%) + 163,822,146 instructions:u # 4.38 insn per cycle + # 0.04 stalled cycles per insn (75.96%) + 0.024185473 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8934) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127537e-06 -Avg ME (F77/C++) = 8.1275366216540664E-006 -Relative difference = 4.655111786058001e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127535e-06 +Avg ME (F77/C++) = 8.1275352476332691E-006 +Relative difference = 3.04684346075092e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.102195e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.131341e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.131341e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 -TOTAL : 0.021142 sec -INFO: No Floating Point Exceptions have been reported - 63,906,261 cycles # 2.611 GHz - 131,729,034 instructions # 2.06 insn per cycle - 0.025029577 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8117) (512y: 28) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127537e-06 -Avg ME (F77/C++) = 8.1275366216540664E-006 -Relative difference = 4.655111786058001e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.321655e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.342179e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.342179e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.165747e-04 +- 6.542824e-04 ) GeV^-4 -TOTAL : 0.027241 sec -INFO: No Floating Point Exceptions have been reported - 57,621,926 cycles # 1.879 GHz - 80,488,160 instructions # 1.40 insn per cycle - 0.031258526 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2521) (512y: 32) (512z: 6939) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127537e-06 -Avg ME (F77/C++) = 8.1275369863475849E-006 -Relative difference = 1.6797726498700304e-09 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt index 16ac12981a..67d4ea5be4 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt @@ -1,233 +1,193 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-05-16_15:20:13 +DATE: 2024-05-16_16:47:47 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.172533e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.195464e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.199217e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.467059 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.973967e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.839341e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.842392e+03 ) sec^-1 +MeanMatrixElemValue = ( 5.989810e-05 +- 3.867612e-05 ) GeV^-4 +TOTAL : 0.389170 sec INFO: No Floating Point Exceptions have been reported - 1,929,783,722 cycles # 2.812 GHz - 2,830,067,082 instructions # 1.47 insn per cycle - 0.744348567 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 1,049,671,631 cycles:u # 2.595 GHz (73.34%) + 2,145,491 stalled-cycles-frontend:u # 0.20% frontend cycles idle (74.13%) + 5,338,589 stalled-cycles-backend:u # 0.51% backend cycles idle (74.49%) + 1,484,554,929 instructions:u # 1.41 insn per cycle + # 0.00 stalled cycles per insn (75.03%) + 0.435834746 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.817494e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.954472e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.963776e+05 ) sec^-1 -MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 -TOTAL : 0.485210 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.645400e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.858752e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.859708e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.402315e-01 +- 3.184905e-01 ) GeV^-4 +TOTAL : 0.407362 sec INFO: No Floating Point Exceptions have been reported - 1,989,265,248 cycles # 2.816 GHz - 2,972,405,087 instructions # 1.49 insn per cycle - 0.764721680 seconds time elapsed + 1,054,455,188 cycles:u # 2.486 GHz (75.29%) + 2,146,967 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.61%) + 5,177,929 stalled-cycles-backend:u # 0.49% backend cycles idle (75.98%) + 1,493,526,168 instructions:u # 1.42 insn per cycle + # 0.00 stalled cycles per insn (75.76%) + 0.451964984 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 8.127459e-06 -Avg ME (F77/GPU) = 8.1274562879405200E-006 -Relative difference = 3.3369094561706885e-07 +Avg ME (F77/GPU) = 8.1274562879405183E-006 +Relative difference = 3.336909458255062e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.312127e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.315249e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.315249e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.164695 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 3.970330e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.973645e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.973645e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.138543 sec INFO: No Floating Point Exceptions have been reported - 479,517,658 cycles # 2.854 GHz - 1,405,303,424 instructions # 2.93 insn per cycle - 0.168655160 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3977) (avx2: 0) (512y: 0) (512z: 0) + 464,427,899 cycles:u # 3.271 GHz (73.13%) + 76,464 stalled-cycles-frontend:u # 0.02% frontend cycles idle (71.92%) + 62,995,016 stalled-cycles-backend:u # 13.56% backend cycles idle (72.25%) + 1,444,762,442 instructions:u # 3.11 insn per cycle + # 0.04 stalled cycles per insn (75.08%) + 0.145335380 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1922) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562948736117E-006 Relative difference = 3.32837900190667e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.589174e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.601629e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.601629e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.085009 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 9.063181e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.080166e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.080166e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.064146 sec INFO: No Floating Point Exceptions have been reported - 242,672,694 cycles # 2.748 GHz - 691,102,866 instructions # 2.85 insn per cycle - 0.088915527 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 9324) (avx2: 0) (512y: 0) (512z: 0) + 198,464,404 cycles:u # 2.934 GHz (76.37%) + 106,440 stalled-cycles-frontend:u # 0.05% frontend cycles idle (76.37%) + 22,274,758 stalled-cycles-backend:u # 11.22% backend cycles idle (76.37%) + 658,392,705 instructions:u # 3.32 insn per cycle + # 0.03 stalled cycles per insn (76.37%) + 0.071173080 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9115) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563175290919E-006 Relative difference = 3.3005037703909805e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.402863e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.409241e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.409241e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.042757 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.100149e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.108981e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.108981e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.030748 sec INFO: No Floating Point Exceptions have been reported - 119,836,607 cycles # 2.596 GHz - 257,882,084 instructions # 2.15 insn per cycle - 0.046733316 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8244) (512y: 0) (512z: 0) + 106,500,188 cycles:u # 3.126 GHz (72.86%) + 75,830 stalled-cycles-frontend:u # 0.07% frontend cycles idle (76.61%) + 11,125,913 stalled-cycles-backend:u # 10.45% backend cycles idle (76.56%) + 229,525,028 instructions:u # 2.16 insn per cycle + # 0.05 stalled cycles per insn (76.56%) + 0.037202441 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8195) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.611690e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.620124e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.620124e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.037856 sec -INFO: No Floating Point Exceptions have been reported - 108,462,768 cycles # 2.631 GHz - 238,127,423 instructions # 2.20 insn per cycle - 0.041890123 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7342) (512y: 146) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274563450143301E-006 -Relative difference = 3.266686019634872e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.150674e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.155466e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.155466e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.051477 sec -INFO: No Floating Point Exceptions have been reported - 99,538,839 cycles # 1.810 GHz - 139,339,349 instructions # 1.40 insn per cycle - 0.055665824 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1953) (512y: 122) (512z: 6323) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274563450143301E-006 -Relative difference = 3.266686019634872e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt index 96180e8a09..7c7adbf4e6 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt @@ -1,233 +1,193 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-05-16_15:20:23 +DATE: 2024-05-16_16:47:53 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.207087e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.230616e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.234507e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.468179 sec +EvtsPerSec[Rmb+ME] (23) = ( 8.329669e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.100366e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.104247e+03 ) sec^-1 +MeanMatrixElemValue = ( 5.989810e-05 +- 3.867612e-05 ) GeV^-4 +TOTAL : 0.414191 sec INFO: No Floating Point Exceptions have been reported - 1,938,727,271 cycles # 2.813 GHz - 2,835,562,501 instructions # 1.46 insn per cycle - 0.747262841 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 966,999,156 cycles:u # 2.339 GHz (74.01%) + 2,212,971 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.43%) + 5,017,574 stalled-cycles-backend:u # 0.52% backend cycles idle (74.75%) + 1,467,686,990 instructions:u # 1.52 insn per cycle + # 0.00 stalled cycles per insn (75.63%) + 0.460852954 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.924846e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.065621e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.075056e+05 ) sec^-1 -MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 -TOTAL : 0.482793 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.666830e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.874292e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.875374e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.402315e-01 +- 3.184905e-01 ) GeV^-4 +TOTAL : 0.428185 sec INFO: No Floating Point Exceptions have been reported - 2,011,507,022 cycles # 2.818 GHz - 2,962,288,052 instructions # 1.47 insn per cycle - 0.770325801 seconds time elapsed + 1,078,414,057 cycles:u # 2.408 GHz (74.54%) + 2,243,483 stalled-cycles-frontend:u # 0.21% frontend cycles idle (75.90%) + 5,240,014 stalled-cycles-backend:u # 0.49% backend cycles idle (76.16%) + 1,523,411,562 instructions:u # 1.41 insn per cycle + # 0.00 stalled cycles per insn (75.37%) + 0.473470367 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 8.127459e-06 -Avg ME (F77/GPU) = 8.1274562879405200E-006 -Relative difference = 3.3369094561706885e-07 +Avg ME (F77/GPU) = 8.1274562879405183E-006 +Relative difference = 3.336909458255062e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.325014e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.328184e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.328184e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.163699 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 3.983715e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.987011e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.987011e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.138234 sec INFO: No Floating Point Exceptions have been reported - 475,740,171 cycles # 2.851 GHz - 1,400,755,519 instructions # 2.94 insn per cycle - 0.167716370 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3871) (avx2: 0) (512y: 0) (512z: 0) + 471,358,578 cycles:u # 3.326 GHz (72.95%) + 286,150 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.79%) + 53,227,929 stalled-cycles-backend:u # 11.29% backend cycles idle (77.43%) + 1,400,682,647 instructions:u # 2.97 insn per cycle + # 0.04 stalled cycles per insn (77.43%) + 0.144832085 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1898) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562948736117E-006 Relative difference = 3.32837900190667e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.586616e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.599028e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.599028e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.084604 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 9.214837e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.233304e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.233304e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.062301 sec INFO: No Floating Point Exceptions have been reported - 242,310,895 cycles # 2.753 GHz - 687,440,781 instructions # 2.84 insn per cycle - 0.088664129 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 9365) (avx2: 0) (512y: 0) (512z: 0) + 222,145,421 cycles:u # 3.362 GHz (73.31%) + 164,932 stalled-cycles-frontend:u # 0.07% frontend cycles idle (76.00%) + 23,024,841 stalled-cycles-backend:u # 10.36% backend cycles idle (75.81%) + 645,618,042 instructions:u # 2.91 insn per cycle + # 0.04 stalled cycles per insn (75.81%) + 0.069645029 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9168) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563175290919E-006 Relative difference = 3.3005037703909805e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.421509e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.427219e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.427219e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.041396 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.127492e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.136675e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.136675e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.029807 sec INFO: No Floating Point Exceptions have been reported - 117,633,598 cycles # 2.630 GHz - 253,582,281 instructions # 2.16 insn per cycle - 0.045344869 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8196) (512y: 0) (512z: 0) + 101,417,942 cycles:u # 3.046 GHz (74.72%) + 76,113 stalled-cycles-frontend:u # 0.08% frontend cycles idle (76.01%) + 10,755,338 stalled-cycles-backend:u # 10.60% backend cycles idle (76.01%) + 224,352,321 instructions:u # 2.21 insn per cycle + # 0.05 stalled cycles per insn (76.01%) + 0.036561379 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8148) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.533249e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.540083e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.540083e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.038782 sec -INFO: No Floating Point Exceptions have been reported - 106,121,372 cycles # 2.518 GHz - 233,883,831 instructions # 2.20 insn per cycle - 0.042791740 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7292) (512y: 146) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274563450143301E-006 -Relative difference = 3.266686019634872e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.148151e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.152898e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.152898e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.050273 sec -INFO: No Floating Point Exceptions have been reported - 95,562,086 cycles # 1.781 GHz - 134,760,547 instructions # 1.41 insn per cycle - 0.054201969 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1904) (512y: 122) (512z: 6323) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274563450143301E-006 -Relative difference = 3.266686019634872e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt index 15f8e8659d..b9a455b78d 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-05-16_15:18:21 +DATE: 2024-05-16_16:46:36 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.830621e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.798641e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.407520e+08 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.518896 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.624010e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.367574e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.801579e+08 ) sec^-1 +MeanMatrixElemValue = ( 4.239474e-01 +- 1.231432e-04 ) GeV^0 +TOTAL : 0.355596 sec INFO: No Floating Point Exceptions have been reported - 2,130,015,467 cycles # 2.824 GHz - 3,049,782,764 instructions # 1.43 insn per cycle - 0.811167083 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 132 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 795,883,375 cycles:u # 2.164 GHz (73.59%) + 2,199,508 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.16%) + 5,328,268 stalled-cycles-backend:u # 0.67% backend cycles idle (74.73%) + 1,302,983,695 instructions:u # 1.64 insn per cycle + # 0.00 stalled cycles per insn (76.23%) + 0.408744876 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 4.232897e-01 -Avg ME (F77/GPU) = 0.42328961386341946 -Relative difference = 2.034932117056294e-07 +Avg ME (F77/GPU) = 0.42328961386341935 +Relative difference = 2.0349321196791385e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.652167e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.115593e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.115593e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 1.200987 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.171586e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.330161e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.330161e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 +TOTAL : 1.087037 sec INFO: No Floating Point Exceptions have been reported - 3,451,141,340 cycles # 2.863 GHz - 8,714,346,508 instructions # 2.53 insn per cycle - 1.206502072 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 458) (avx2: 0) (512y: 0) (512z: 0) + 3,510,794,127 cycles:u # 3.189 GHz (74.59%) + 8,350,329 stalled-cycles-frontend:u # 0.24% frontend cycles idle (74.67%) + 9,535,648 stalled-cycles-backend:u # 0.27% backend cycles idle (75.04%) + 8,527,000,615 instructions:u # 2.43 insn per cycle + # 0.00 stalled cycles per insn (75.29%) + 1.105132836 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 422) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341951 Relative difference = 2.0349321157448718e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.615216e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.136998e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.136998e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.764589 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.165406e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.818486e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.818486e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 +TOTAL : 0.676108 sec INFO: No Floating Point Exceptions have been reported - 2,197,801,743 cycles # 2.856 GHz - 5,465,338,789 instructions # 2.49 insn per cycle - 0.770190206 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1298) (avx2: 0) (512y: 0) (512z: 0) + 2,061,850,364 cycles:u # 2.986 GHz (74.63%) + 9,451,247 stalled-cycles-frontend:u # 0.46% frontend cycles idle (74.56%) + 17,131,190 stalled-cycles-backend:u # 0.83% backend cycles idle (74.52%) + 5,339,599,196 instructions:u # 2.59 insn per cycle + # 0.00 stalled cycles per insn (74.92%) + 0.694041955 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1199) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341951 Relative difference = 2.0349321157448718e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.276018e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.408168e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.408168e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.576218 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 3.515946e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.232968e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.232968e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 +TOTAL : 0.490572 sec INFO: No Floating Point Exceptions have been reported - 1,593,709,911 cycles # 2.743 GHz - 3,182,241,147 instructions # 2.00 insn per cycle - 0.581747530 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1459) (512y: 0) (512z: 0) + 1,400,660,046 cycles:u # 2.775 GHz (74.64%) + 8,301,019 stalled-cycles-frontend:u # 0.59% frontend cycles idle (74.70%) + 17,277,719 stalled-cycles-backend:u # 1.23% backend cycles idle (74.66%) + 3,131,495,578 instructions:u # 2.24 insn per cycle + # 0.01 stalled cycles per insn (74.64%) + 0.507920964 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1429) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341946 Relative difference = 2.034932117056294e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.349428e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.560869e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.560869e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.561533 sec -INFO: No Floating Point Exceptions have been reported - 1,552,006,209 cycles # 2.741 GHz - 3,083,871,547 instructions # 1.99 insn per cycle - 0.567100846 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1274) (512y: 95) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.232897e-01 -Avg ME (F77/C++) = 0.42328961386341946 -Relative difference = 2.034932117056294e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.103380e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.012957e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.012957e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.614313 sec -INFO: No Floating Point Exceptions have been reported - 1,344,567,311 cycles # 2.171 GHz - 2,376,857,450 instructions # 1.77 insn per cycle - 0.619905839 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 584) (512y: 62) (512z: 953) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.232897e-01 -Avg ME (F77/C++) = 0.42328961386341946 -Relative difference = 2.034932117056294e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt index 6add239f16..16cae251dc 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-05-16_15:18:33 +DATE: 2024-05-16_16:46:43 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.948407e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.328423e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.761410e+08 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.519601 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.532002e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.960780e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.579462e+08 ) sec^-1 +MeanMatrixElemValue = ( 4.239474e-01 +- 1.231432e-04 ) GeV^0 +TOTAL : 0.341639 sec INFO: No Floating Point Exceptions have been reported - 2,123,926,879 cycles # 2.815 GHz - 2,991,717,095 instructions # 1.41 insn per cycle - 0.811782941 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 744,748,649 cycles:u # 2.053 GHz (75.09%) + 2,024,882 stalled-cycles-frontend:u # 0.27% frontend cycles idle (74.84%) + 5,540,587 stalled-cycles-backend:u # 0.74% backend cycles idle (74.84%) + 1,318,423,649 instructions:u # 1.77 insn per cycle + # 0.00 stalled cycles per insn (76.44%) + 0.395137586 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 4.232897e-01 -Avg ME (F77/GPU) = 0.42328961386341946 -Relative difference = 2.034932117056294e-07 +Avg ME (F77/GPU) = 0.42328961386341935 +Relative difference = 2.0349321196791385e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.686449e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.122021e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.122021e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 1.196252 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.177846e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.335135e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.335135e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 +TOTAL : 1.081545 sec INFO: No Floating Point Exceptions have been reported - 3,435,810,217 cycles # 2.862 GHz - 8,629,255,980 instructions # 2.51 insn per cycle - 1.201785163 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 403) (avx2: 0) (512y: 0) (512z: 0) + 3,482,832,137 cycles:u # 3.178 GHz (74.70%) + 8,844,725 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.05%) + 15,499,947 stalled-cycles-backend:u # 0.45% backend cycles idle (75.18%) + 8,525,375,009 instructions:u # 2.45 insn per cycle + # 0.00 stalled cycles per insn (75.18%) + 1.099594807 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 356) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341951 Relative difference = 2.0349321157448718e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.590372e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.090308e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.090308e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.773787 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.167114e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.819518e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.819518e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 +TOTAL : 0.674010 sec INFO: No Floating Point Exceptions have been reported - 2,172,281,754 cycles # 2.790 GHz - 5,399,686,889 instructions # 2.49 insn per cycle - 0.779398624 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1258) (avx2: 0) (512y: 0) (512z: 0) + 2,073,283,760 cycles:u # 3.014 GHz (74.53%) + 9,474,615 stalled-cycles-frontend:u # 0.46% frontend cycles idle (74.38%) + 17,215,413 stalled-cycles-backend:u # 0.83% backend cycles idle (74.60%) + 5,261,755,497 instructions:u # 2.54 insn per cycle + # 0.00 stalled cycles per insn (75.22%) + 0.691195150 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1165) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341951 Relative difference = 2.0349321157448718e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.283822e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.420214e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.420214e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.573489 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 3.522414e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.235097e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.235097e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 +TOTAL : 0.488293 sec INFO: No Floating Point Exceptions have been reported - 1,585,769,603 cycles # 2.741 GHz - 3,149,146,191 instructions # 1.99 insn per cycle - 0.579182812 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1386) (512y: 0) (512z: 0) + 1,394,369,363 cycles:u # 2.775 GHz (74.72%) + 8,267,581 stalled-cycles-frontend:u # 0.59% frontend cycles idle (74.58%) + 16,512,177 stalled-cycles-backend:u # 1.18% backend cycles idle (74.59%) + 3,133,423,950 instructions:u # 2.25 insn per cycle + # 0.01 stalled cycles per insn (74.53%) + 0.505600439 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1373) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341946 Relative difference = 2.034932117056294e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.354137e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.604902e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.604902e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.559958 sec -INFO: No Floating Point Exceptions have been reported - 1,547,131,577 cycles # 2.739 GHz - 3,062,437,995 instructions # 1.98 insn per cycle - 0.565482274 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1220) (512y: 95) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.232897e-01 -Avg ME (F77/C++) = 0.42328961386341946 -Relative difference = 2.034932117056294e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.108481e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.023241e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.023241e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.612188 sec -INFO: No Floating Point Exceptions have been reported - 1,354,565,413 cycles # 2.195 GHz - 2,362,076,089 instructions # 1.74 insn per cycle - 0.617754113 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 557) (512y: 62) (512z: 944) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.232897e-01 -Avg ME (F77/C++) = 0.42328961386341946 -Relative difference = 2.034932117056294e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt index 35b822f8f6..6b8b4b7ad6 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-05-16_15:18:45 +DATE: 2024-05-16_16:46:51 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.370205e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.202282e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.219119e+09 ) sec^-1 -MeanMatrixElemValue = ( 4.240325e-01 +- 1.231174e-04 ) GeV^0 -TOTAL : 0.481970 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.336288e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.319598e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.720811e+09 ) sec^-1 +MeanMatrixElemValue = ( 4.242352e-01 +- 1.228752e-04 ) GeV^0 +TOTAL : 0.316203 sec INFO: No Floating Point Exceptions have been reported - 1,992,725,828 cycles # 2.818 GHz - 2,868,294,521 instructions # 1.44 insn per cycle - 0.764321619 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 72 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 745,531,179 cycles:u # 2.195 GHz (73.82%) + 2,167,811 stalled-cycles-frontend:u # 0.29% frontend cycles idle (74.03%) + 5,094,435 stalled-cycles-backend:u # 0.68% backend cycles idle (75.89%) + 1,190,444,919 instructions:u # 1.60 insn per cycle + # 0.00 stalled cycles per insn (76.57%) + 0.365656414 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 4.232893e-01 -Avg ME (F77/GPU) = 0.42328959883889183 -Relative difference = 7.059920764700599e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 4.232895e-01 +Avg ME (F77/GPU) = 0.42328966126660816 +Relative difference = 3.80984192091939e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.685625e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.126627e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.126627e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 1.173857 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.304592e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.495176e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.495176e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241992e-01 +- 1.228718e-04 ) GeV^0 +TOTAL : 0.955824 sec INFO: No Floating Point Exceptions have been reported - 3,371,653,633 cycles # 2.862 GHz - 8,663,374,999 instructions # 2.57 insn per cycle - 1.179087797 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 464) (avx2: 0) (512y: 0) (512z: 0) + 3,142,646,888 cycles:u # 3.256 GHz (75.02%) + 6,879,009 stalled-cycles-frontend:u # 0.22% frontend cycles idle (75.16%) + 7,453,801 stalled-cycles-backend:u # 0.24% backend cycles idle (75.14%) + 8,494,020,775 instructions:u # 2.70 insn per cycle + # 0.00 stalled cycles per insn (75.14%) + 0.969023811 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 516) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 Avg ME (F77/C++) = 0.42328961598104797 Relative difference = 3.775440734888737e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.242831e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.476100e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.476100e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.559869 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 3.382427e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.124207e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.124207e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241992e-01 +- 1.228718e-04 ) GeV^0 +TOTAL : 0.463796 sec INFO: No Floating Point Exceptions have been reported - 1,544,628,517 cycles # 2.742 GHz - 3,687,558,281 instructions # 2.39 insn per cycle - 0.565253973 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1472) (avx2: 0) (512y: 0) (512z: 0) + 1,400,689,385 cycles:u # 2.954 GHz (74.73%) + 7,018,882 stalled-cycles-frontend:u # 0.50% frontend cycles idle (74.70%) + 9,913,813 stalled-cycles-backend:u # 0.71% backend cycles idle (74.70%) + 3,706,220,717 instructions:u # 2.65 insn per cycle + # 0.00 stalled cycles per insn (74.75%) + 0.492260546 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1393) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 -Avg ME (F77/C++) = 0.42328960439772345 -Relative difference = 1.0389396439618597e-08 +Avg ME (F77/C++) = 0.42328960620216094 +Relative difference = 1.4652287586288606e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.072720e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.536969e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.536969e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.431765 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 4.689283e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.323261e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.323261e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241992e-01 +- 1.228719e-04 ) GeV^0 +TOTAL : 0.376265 sec INFO: No Floating Point Exceptions have been reported - 1,203,780,059 cycles # 2.758 GHz - 2,425,738,448 instructions # 2.02 insn per cycle - 0.436956710 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1835) (512y: 0) (512z: 0) + 1,095,520,511 cycles:u # 2.839 GHz (75.12%) + 6,398,970 stalled-cycles-frontend:u # 0.58% frontend cycles idle (75.13%) + 22,193,733 stalled-cycles-backend:u # 2.03% backend cycles idle (75.13%) + 2,338,728,203 instructions:u # 2.13 insn per cycle + # 0.01 stalled cycles per insn (75.13%) + 0.389199301 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1807) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 -Avg ME (F77/C++) = 0.42328956670826301 -Relative difference = 7.865002347873079e-08 +Avg ME (F77/C++) = 0.42328956839628518 +Relative difference = 7.466215756732981e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.171115e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.846212e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.846212e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.420853 sec -INFO: No Floating Point Exceptions have been reported - 1,176,016,394 cycles # 2.764 GHz - 2,371,904,468 instructions # 2.02 insn per cycle - 0.426173333 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1716) (512y: 2) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.232896e-01 -Avg ME (F77/C++) = 0.42328956670826301 -Relative difference = 7.865002347873079e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.877260e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.908000e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.908000e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.456855 sec -INFO: No Floating Point Exceptions have been reported - 1,057,659,631 cycles # 2.291 GHz - 2,045,594,279 instructions # 1.93 insn per cycle - 0.462305299 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1125) (512y: 5) (512z: 1216) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.232896e-01 -Avg ME (F77/C++) = 0.42328957567224279 -Relative difference = 5.7473080363015266e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt index 7aff49b16c..4f2f50212a 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-05-16_15:18:56 +DATE: 2024-05-16_16:46:58 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.371360e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.210950e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.256375e+09 ) sec^-1 -MeanMatrixElemValue = ( 4.240325e-01 +- 1.231174e-04 ) GeV^0 -TOTAL : 0.480672 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.276474e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.320229e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.723645e+09 ) sec^-1 +MeanMatrixElemValue = ( 4.242352e-01 +- 1.228752e-04 ) GeV^0 +TOTAL : 0.321429 sec INFO: No Floating Point Exceptions have been reported - 1,992,055,315 cycles # 2.814 GHz - 2,833,598,547 instructions # 1.42 insn per cycle - 0.764848194 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 71 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 765,125,856 cycles:u # 2.238 GHz (74.84%) + 2,127,653 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.03%) + 4,024,928 stalled-cycles-backend:u # 0.53% backend cycles idle (75.09%) + 1,274,853,767 instructions:u # 1.67 insn per cycle + # 0.00 stalled cycles per insn (77.21%) + 0.373458468 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 4.232893e-01 -Avg ME (F77/GPU) = 0.42328960436861962 -Relative difference = 7.190557844040413e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 4.232895e-01 +Avg ME (F77/GPU) = 0.42328966126660816 +Relative difference = 3.80984192091939e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.763702e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.137508e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.137508e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 1.163446 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.328949e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.527922e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.527922e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241992e-01 +- 1.228718e-04 ) GeV^0 +TOTAL : 0.941970 sec INFO: No Floating Point Exceptions have been reported - 3,338,476,373 cycles # 2.858 GHz - 8,537,550,948 instructions # 2.56 insn per cycle - 1.168736395 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 372) (avx2: 0) (512y: 0) (512z: 0) + 3,079,281,165 cycles:u # 3.236 GHz (74.61%) + 6,178,915 stalled-cycles-frontend:u # 0.20% frontend cycles idle (74.78%) + 9,066,673 stalled-cycles-backend:u # 0.29% backend cycles idle (74.81%) + 8,547,816,293 instructions:u # 2.78 insn per cycle + # 0.00 stalled cycles per insn (74.81%) + 0.955595287 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 379) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 Avg ME (F77/C++) = 0.42328961598104797 Relative difference = 3.775440734888737e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.260122e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.497908e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.497908e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.555036 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 3.385350e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.149017e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.149017e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241992e-01 +- 1.228718e-04 ) GeV^0 +TOTAL : 0.463541 sec INFO: No Floating Point Exceptions have been reported - 1,536,047,057 cycles # 2.745 GHz - 3,655,155,421 instructions # 2.38 insn per cycle - 0.560267212 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1417) (avx2: 0) (512y: 0) (512z: 0) + 1,397,571,134 cycles:u # 2.954 GHz (74.65%) + 7,037,611 stalled-cycles-frontend:u # 0.50% frontend cycles idle (74.64%) + 11,680,404 stalled-cycles-backend:u # 0.84% backend cycles idle (74.70%) + 3,670,865,817 instructions:u # 2.63 insn per cycle + # 0.00 stalled cycles per insn (74.70%) + 0.476825915 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1365) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 -Avg ME (F77/C++) = 0.42328960439772345 -Relative difference = 1.0389396439618597e-08 +Avg ME (F77/C++) = 0.42328960620216094 +Relative difference = 1.4652287586288606e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.063874e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.501699e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.501699e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.432903 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 4.695540e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.339130e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.339130e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241992e-01 +- 1.228719e-04 ) GeV^0 +TOTAL : 0.376823 sec INFO: No Floating Point Exceptions have been reported - 1,210,141,290 cycles # 2.765 GHz - 2,409,755,736 instructions # 1.99 insn per cycle - 0.438252635 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1739) (512y: 0) (512z: 0) + 1,079,351,160 cycles:u # 2.789 GHz (75.20%) + 6,706,529 stalled-cycles-frontend:u # 0.62% frontend cycles idle (75.20%) + 8,357,372 stalled-cycles-backend:u # 0.77% backend cycles idle (75.20%) + 2,355,555,888 instructions:u # 2.18 insn per cycle + # 0.00 stalled cycles per insn (75.19%) + 0.390366323 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1722) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 -Avg ME (F77/C++) = 0.42328956670826301 -Relative difference = 7.865002347873079e-08 +Avg ME (F77/C++) = 0.42328956839628518 +Relative difference = 7.466215756732981e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.166764e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.861571e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.861571e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.420903 sec -INFO: No Floating Point Exceptions have been reported - 1,178,969,939 cycles # 2.770 GHz - 2,360,225,770 instructions # 2.00 insn per cycle - 0.426183474 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1639) (512y: 2) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.232896e-01 -Avg ME (F77/C++) = 0.42328956670826301 -Relative difference = 7.865002347873079e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.911284e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.009343e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.009343e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.450059 sec -INFO: No Floating Point Exceptions have been reported - 1,050,992,336 cycles # 2.312 GHz - 2,030,439,704 instructions # 1.93 insn per cycle - 0.455402836 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1038) (512y: 5) (512z: 1206) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.232896e-01 -Avg ME (F77/C++) = 0.42328957567224279 -Relative difference = 5.7473080363015266e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt index abe970d6c3..c7066c8a74 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-05-16_15:19:08 +DATE: 2024-05-16_16:47:05 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.820532e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.774843e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.362520e+08 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.522135 sec +EvtsPerSec[Rmb+ME] (23) = ( 5.306885e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.364036e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.798270e+08 ) sec^-1 +MeanMatrixElemValue = ( 4.239474e-01 +- 1.231432e-04 ) GeV^0 +TOTAL : 0.363559 sec INFO: No Floating Point Exceptions have been reported - 2,125,526,304 cycles # 2.816 GHz - 3,031,609,259 instructions # 1.43 insn per cycle - 0.813775431 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 132 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 762,883,474 cycles:u # 2.061 GHz (73.90%) + 2,138,647 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.39%) + 5,273,566 stalled-cycles-backend:u # 0.69% backend cycles idle (76.10%) + 1,266,871,946 instructions:u # 1.66 insn per cycle + # 0.00 stalled cycles per insn (76.25%) + 0.418126761 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 4.232897e-01 -Avg ME (F77/GPU) = 0.42328961420809225 -Relative difference = 2.02678940084305e-07 +Avg ME (F77/GPU) = 0.42328961420809230 +Relative difference = 2.026789399531628e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.477506e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.093135e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.093135e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 1.221347 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.165373e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.317942e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.317942e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 +TOTAL : 1.093202 sec INFO: No Floating Point Exceptions have been reported - 3,505,104,547 cycles # 2.859 GHz - 8,781,502,817 instructions # 2.51 insn per cycle - 1.226777715 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 466) (avx2: 0) (512y: 0) (512z: 0) + 3,517,886,438 cycles:u # 3.171 GHz (74.79%) + 8,156,251 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.78%) + 14,175,606 stalled-cycles-backend:u # 0.40% backend cycles idle (74.77%) + 8,647,070,613 instructions:u # 2.46 insn per cycle + # 0.00 stalled cycles per insn (74.94%) + 1.113015607 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 427) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962565639783 Relative difference = 1.7563291089600324e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.650256e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.201424e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.201424e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.750812 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.201883e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.882372e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.882372e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 +TOTAL : 0.669302 sec INFO: No Floating Point Exceptions have been reported - 2,158,593,065 cycles # 2.858 GHz - 5,461,970,761 instructions # 2.53 insn per cycle - 0.756427517 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1315) (avx2: 0) (512y: 0) (512z: 0) + 2,048,006,224 cycles:u # 2.992 GHz (74.29%) + 8,758,196 stalled-cycles-frontend:u # 0.43% frontend cycles idle (74.77%) + 12,292,199 stalled-cycles-backend:u # 0.60% backend cycles idle (75.30%) + 5,263,631,425 instructions:u # 2.57 insn per cycle + # 0.00 stalled cycles per insn (75.46%) + 0.687604394 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1260) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962565639783 Relative difference = 1.7563291089600324e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.173052e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.222124e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.222124e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.600946 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 3.587206e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.385221e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.385221e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 +TOTAL : 0.484670 sec INFO: No Floating Point Exceptions have been reported - 1,584,857,703 cycles # 2.630 GHz - 3,130,453,718 instructions # 1.98 insn per cycle - 0.606559761 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1508) (512y: 0) (512z: 0) + 1,381,448,051 cycles:u # 2.761 GHz (74.62%) + 8,400,102 stalled-cycles-frontend:u # 0.61% frontend cycles idle (74.48%) + 13,152,020 stalled-cycles-backend:u # 0.95% backend cycles idle (74.42%) + 3,058,571,921 instructions:u # 2.21 insn per cycle + # 0.00 stalled cycles per insn (74.43%) + 0.503638227 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1494) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 -Avg ME (F77/C++) = 0.42328962604218012 -Relative difference = 1.747215201983364e-07 +Avg ME (F77/C++) = 0.42328962559055894 +Relative difference = 1.757884518645067e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.444228e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.788523e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.788523e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.544040 sec -INFO: No Floating Point Exceptions have been reported - 1,507,653,377 cycles # 2.746 GHz - 2,979,978,086 instructions # 1.98 insn per cycle - 0.549733637 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1266) (512y: 104) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.232897e-01 -Avg ME (F77/C++) = 0.42328962604218012 -Relative difference = 1.747215201983364e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.159766e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.131056e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.131056e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.601738 sec -INFO: No Floating Point Exceptions have been reported - 1,324,343,740 cycles # 2.183 GHz - 2,317,585,809 instructions # 1.75 insn per cycle - 0.607328338 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 708) (512y: 64) (512z: 1000) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.232897e-01 -Avg ME (F77/C++) = 0.42328962604218012 -Relative difference = 1.747215201983364e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt index 91c7a883f0..a386adac7f 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-05-16_15:19:20 +DATE: 2024-05-16_16:47:12 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.922874e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.310136e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.745093e+08 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.518290 sec +EvtsPerSec[Rmb+ME] (23) = ( 8.003235e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.979938e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.601795e+08 ) sec^-1 +MeanMatrixElemValue = ( 4.239474e-01 +- 1.231432e-04 ) GeV^0 +TOTAL : 0.363255 sec INFO: No Floating Point Exceptions have been reported - 2,124,893,311 cycles # 2.820 GHz - 3,045,592,907 instructions # 1.43 insn per cycle - 0.810370808 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 792,900,612 cycles:u # 2.144 GHz (75.31%) + 2,166,032 stalled-cycles-frontend:u # 0.27% frontend cycles idle (75.87%) + 5,227,505 stalled-cycles-backend:u # 0.66% backend cycles idle (75.33%) + 1,376,791,949 instructions:u # 1.74 insn per cycle + # 0.00 stalled cycles per insn (74.08%) + 0.420343005 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 4.232897e-01 -Avg ME (F77/GPU) = 0.42328961420809225 -Relative difference = 2.02678940084305e-07 +Avg ME (F77/GPU) = 0.42328961420809230 +Relative difference = 2.026789399531628e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.542081e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.100861e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.100861e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 1.212162 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.175622e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.330964e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.330964e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 +TOTAL : 1.084986 sec INFO: No Floating Point Exceptions have been reported - 3,479,876,909 cycles # 2.860 GHz - 8,693,142,752 instructions # 2.50 insn per cycle - 1.217788949 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 408) (avx2: 0) (512y: 0) (512z: 0) + 3,500,822,655 cycles:u # 3.182 GHz (74.57%) + 8,555,826 stalled-cycles-frontend:u # 0.24% frontend cycles idle (74.78%) + 14,017,103 stalled-cycles-backend:u # 0.40% backend cycles idle (75.13%) + 8,519,832,574 instructions:u # 2.43 insn per cycle + # 0.00 stalled cycles per insn (75.28%) + 1.104173538 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 358) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962565639783 Relative difference = 1.7563291089600324e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.583309e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.076893e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.076893e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.776846 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.212101e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.890856e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.890856e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 +TOTAL : 0.666977 sec INFO: No Floating Point Exceptions have been reported - 2,167,338,088 cycles # 2.773 GHz - 5,396,551,029 instructions # 2.49 insn per cycle - 0.782321373 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1286) (avx2: 0) (512y: 0) (512z: 0) + 2,034,100,023 cycles:u # 2.982 GHz (74.31%) + 8,738,451 stalled-cycles-frontend:u # 0.43% frontend cycles idle (74.88%) + 17,580,783 stalled-cycles-backend:u # 0.86% backend cycles idle (75.37%) + 5,244,786,981 instructions:u # 2.58 insn per cycle + # 0.00 stalled cycles per insn (75.37%) + 0.685345328 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1221) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962565639783 Relative difference = 1.7563291089600324e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.326845e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.550286e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.550286e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.565802 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 3.561486e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.346476e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.346476e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 +TOTAL : 0.488701 sec INFO: No Floating Point Exceptions have been reported - 1,565,712,129 cycles # 2.743 GHz - 3,096,211,416 instructions # 1.98 insn per cycle - 0.571442008 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1403) (512y: 0) (512z: 0) + 1,403,826,615 cycles:u # 2.784 GHz (73.88%) + 8,976,844 stalled-cycles-frontend:u # 0.64% frontend cycles idle (73.88%) + 16,525,914 stalled-cycles-backend:u # 1.18% backend cycles idle (74.62%) + 3,022,589,948 instructions:u # 2.15 insn per cycle + # 0.01 stalled cycles per insn (75.27%) + 0.507523094 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1430) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 -Avg ME (F77/C++) = 0.42328962604218012 -Relative difference = 1.747215201983364e-07 +Avg ME (F77/C++) = 0.42328962559055894 +Relative difference = 1.757884518645067e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.453432e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.812851e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.812851e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.541762 sec -INFO: No Floating Point Exceptions have been reported - 1,501,240,710 cycles # 2.746 GHz - 2,962,583,104 instructions # 1.97 insn per cycle - 0.547343450 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1207) (512y: 104) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.232897e-01 -Avg ME (F77/C++) = 0.42328962604218012 -Relative difference = 1.747215201983364e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.179755e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.168512e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.168512e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 -TOTAL : 0.595795 sec -INFO: No Floating Point Exceptions have been reported - 1,328,066,698 cycles # 2.210 GHz - 2,301,968,914 instructions # 1.73 insn per cycle - 0.601517736 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 669) (512y: 64) (512z: 987) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.232897e-01 -Avg ME (F77/C++) = 0.42328962604218012 -Relative difference = 1.747215201983364e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt index 685cbca5b9..6d05f96261 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:16:05 +DATE: 2024-05-16_16:45:15 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.742150e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.168430e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.277843e+08 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 0.532609 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.879928e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.958359e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.012731e+07 ) sec^-1 +MeanMatrixElemValue = ( 3.295059e+00 +- 3.224567e-03 ) GeV^0 +TOTAL : 0.382968 sec INFO: No Floating Point Exceptions have been reported - 2,187,320,510 cycles # 2.847 GHz - 3,138,661,758 instructions # 1.43 insn per cycle - 0.825533767 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 852,165,853 cycles:u # 2.135 GHz (75.88%) + 2,112,629 stalled-cycles-frontend:u # 0.25% frontend cycles idle (76.16%) + 4,490,362 stalled-cycles-backend:u # 0.53% backend cycles idle (75.01%) + 1,418,720,820 instructions:u # 1.66 insn per cycle + # 0.00 stalled cycles per insn (74.20%) + 0.447542557 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 3.234080e+00 -Avg ME (F77/GPU) = 3.2340795799595186 -Relative difference = 1.2987943449389332e-07 +Avg ME (F77/GPU) = 3.2340795799595181 +Relative difference = 1.298794346312088e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.052254e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.112326e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.112326e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 5.217611 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.541705e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.606940e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.606940e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 +TOTAL : 4.306705 sec INFO: No Floating Point Exceptions have been reported - 15,171,088,318 cycles # 2.905 GHz - 38,379,828,637 instructions # 2.53 insn per cycle - 5.223033411 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 673) (avx2: 0) (512y: 0) (512z: 0) + 14,746,431,578 cycles:u # 3.414 GHz (75.00%) + 9,550,798 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.00%) + 1,128,989,684 stalled-cycles-backend:u # 7.66% backend cycles idle (75.00%) + 38,772,621,071 instructions:u # 2.63 insn per cycle + # 0.03 stalled cycles per insn (74.91%) + 4.328692731 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 726) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340795799593964 Relative difference = 1.2987947225564713e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.483453e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.675957e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.675957e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 3.119586 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 4.347338e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.559630e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.559630e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 +TOTAL : 2.592991 sec INFO: No Floating Point Exceptions have been reported - 9,050,575,942 cycles # 2.897 GHz - 24,585,418,505 instructions # 2.72 insn per cycle - 3.125051862 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) + 8,703,537,684 cycles:u # 3.341 GHz (74.89%) + 9,976,823 stalled-cycles-frontend:u # 0.11% frontend cycles idle (75.05%) + 853,729,768 stalled-cycles-backend:u # 9.81% backend cycles idle (75.13%) + 24,324,668,051 instructions:u # 2.79 insn per cycle + # 0.04 stalled cycles per insn (75.13%) + 2.608179496 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2067) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 -Avg ME (F77/C++) = 3.2340795799593955 -Relative difference = 1.2987947253027805e-07 +Avg ME (F77/C++) = 3.2340795799593964 +Relative difference = 1.2987947225564713e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.531605e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.007383e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.007383e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 2.004395 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 7.569514e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.185817e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.185817e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 +TOTAL : 1.575070 sec INFO: No Floating Point Exceptions have been reported - 5,470,487,475 cycles # 2.723 GHz - 11,258,117,341 instructions # 2.06 insn per cycle - 2.009874159 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2379) (512y: 0) (512z: 0) + 5,122,078,109 cycles:u # 3.228 GHz (74.83%) + 8,453,850 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.81%) + 68,227,274 stalled-cycles-backend:u # 1.33% backend cycles idle (74.81%) + 11,519,205,830 instructions:u # 2.25 insn per cycle + # 0.01 stalled cycles per insn (74.89%) + 1.590376059 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2399) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 -Avg ME (F77/C++) = 3.2340795799594546 -Relative difference = 1.2987945426732077e-07 +Avg ME (F77/C++) = 3.2340795799594542 +Relative difference = 1.2987945440463624e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.034312e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.611178e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.611178e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 1.846817 sec -INFO: No Floating Point Exceptions have been reported - 4,937,000,755 cycles # 2.666 GHz - 10,562,656,233 instructions # 2.14 insn per cycle - 1.852346867 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 3.234080e+00 -Avg ME (F77/C++) = 3.2340795799594546 -Relative difference = 1.2987945426732077e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.686069e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.892849e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.892849e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 2.955560 sec -INFO: No Floating Point Exceptions have been reported - 5,363,967,162 cycles # 1.812 GHz - 7,798,816,647 instructions # 1.45 insn per cycle - 2.961128813 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1545) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 3.234080e+00 -Avg ME (F77/C++) = 3.2340795799594546 -Relative difference = 1.2987945426732077e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt index e33bd01ef0..628ec0434d 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:16:29 +DATE: 2024-05-16_16:45:28 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.734270e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.167895e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.277771e+08 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 0.531030 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.841194e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.924897e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.978336e+07 ) sec^-1 +MeanMatrixElemValue = ( 3.295059e+00 +- 3.224567e-03 ) GeV^0 +TOTAL : 0.395777 sec INFO: No Floating Point Exceptions have been reported - 2,147,766,041 cycles # 2.808 GHz - 3,081,960,346 instructions # 1.43 insn per cycle - 0.823573588 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 859,912,650 cycles:u # 2.064 GHz (75.28%) + 2,183,975 stalled-cycles-frontend:u # 0.25% frontend cycles idle (76.01%) + 5,020,739 stalled-cycles-backend:u # 0.58% backend cycles idle (75.61%) + 1,382,370,967 instructions:u # 1.61 insn per cycle + # 0.00 stalled cycles per insn (74.66%) + 0.450533831 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 3.234080e+00 -Avg ME (F77/GPU) = 3.2340795799595186 -Relative difference = 1.2987943449389332e-07 +Avg ME (F77/GPU) = 3.2340795799595181 +Relative difference = 1.298794346312088e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.072347e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.133952e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.133952e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 5.167480 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.422423e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.483471e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.483471e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 +TOTAL : 4.510005 sec INFO: No Floating Point Exceptions have been reported - 15,011,121,904 cycles # 2.902 GHz - 40,101,107,795 instructions # 2.67 insn per cycle - 5.172969591 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) + 15,386,177,336 cycles:u # 3.403 GHz (74.91%) + 9,207,550 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.94%) + 18,497,567 stalled-cycles-backend:u # 0.12% backend cycles idle (75.02%) + 39,476,222,640 instructions:u # 2.57 insn per cycle + # 0.00 stalled cycles per insn (75.06%) + 4.525913205 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 597) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 -Avg ME (F77/C++) = 3.2340795799593964 -Relative difference = 1.2987947225564713e-07 +Avg ME (F77/C++) = 3.2340795799593969 +Relative difference = 1.2987947211833165e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.643871e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.853935e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.853935e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 2.986462 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 4.382705e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.599240e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.599240e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 +TOTAL : 2.572662 sec INFO: No Floating Point Exceptions have been reported - 8,687,902,361 cycles # 2.905 GHz - 23,671,582,038 instructions # 2.72 insn per cycle - 2.991891761 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2071) (avx2: 0) (512y: 0) (512z: 0) + 8,623,908,400 cycles:u # 3.337 GHz (74.97%) + 9,173,907 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.93%) + 1,146,019,295 stalled-cycles-backend:u # 13.29% backend cycles idle (74.93%) + 23,610,189,818 instructions:u # 2.74 insn per cycle + # 0.05 stalled cycles per insn (74.94%) + 2.587492591 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1947) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 -Avg ME (F77/C++) = 3.2340795799593955 -Relative difference = 1.2987947253027805e-07 +Avg ME (F77/C++) = 3.2340795799593964 +Relative difference = 1.2987947225564713e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.688647e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.031946e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.031946e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 2.347118 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 6.943197e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.423167e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.423167e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 +TOTAL : 1.694513 sec INFO: No Floating Point Exceptions have been reported - 6,408,205,490 cycles # 2.726 GHz - 13,061,009,362 instructions # 2.04 insn per cycle - 2.352705794 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2545) (512y: 0) (512z: 0) + 5,665,749,753 cycles:u # 3.318 GHz (74.76%) + 8,841,103 stalled-cycles-frontend:u # 0.16% frontend cycles idle (74.75%) + 614,722,978 stalled-cycles-backend:u # 10.85% backend cycles idle (74.98%) + 13,076,993,512 instructions:u # 2.31 insn per cycle + # 0.05 stalled cycles per insn (75.17%) + 1.710627514 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2559) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 -Avg ME (F77/C++) = 3.2340795799594546 -Relative difference = 1.2987945426732077e-07 +Avg ME (F77/C++) = 3.2340795799594542 +Relative difference = 1.2987945440463624e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.217515e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.639971e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.639971e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 2.116902 sec -INFO: No Floating Point Exceptions have been reported - 5,786,103,959 cycles # 2.728 GHz - 12,322,398,791 instructions # 2.13 insn per cycle - 2.122365893 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2092) (512y: 294) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 3.234080e+00 -Avg ME (F77/C++) = 3.2340795799594546 -Relative difference = 1.2987945426732077e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.391355e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.565589e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.565589e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 3.201193 sec -INFO: No Floating Point Exceptions have been reported - 5,819,258,849 cycles # 1.816 GHz - 9,603,315,511 instructions # 1.65 insn per cycle - 3.206783116 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1509) (512y: 209) (512z: 1970) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 3.234080e+00 -Avg ME (F77/C++) = 3.2340795799594546 -Relative difference = 1.2987945426732077e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt index fa2404eda0..12dcdcf4bd 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:16:53 +DATE: 2024-05-16_16:45:42 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.806467e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.679043e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.988694e+08 ) sec^-1 -MeanMatrixElemValue = ( 3.294909e+00 +- 3.228140e-03 ) GeV^0 -TOTAL : 0.484472 sec +EvtsPerSec[Rmb+ME] (23) = ( 9.269455e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.916466e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.075106e+08 ) sec^-1 +MeanMatrixElemValue = ( 3.286435e+00 +- 3.209475e-03 ) GeV^0 +TOTAL : 0.322081 sec INFO: No Floating Point Exceptions have been reported - 2,024,107,607 cycles # 2.847 GHz - 2,925,717,340 instructions # 1.45 insn per cycle - 0.767822860 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 748,467,775 cycles:u # 2.155 GHz (75.25%) + 2,088,025 stalled-cycles-frontend:u # 0.28% frontend cycles idle (75.11%) + 4,948,506 stalled-cycles-backend:u # 0.66% backend cycles idle (75.08%) + 1,247,489,937 instructions:u # 1.67 insn per cycle + # 0.00 stalled cycles per insn (75.83%) + 0.385008032 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 3.234085e+00 -Avg ME (F77/GPU) = 3.2341253389604390 -Relative difference = 1.2473067479392238e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 3.234089e+00 +Avg ME (F77/GPU) = 3.2340912986546755 +Relative difference = 7.107580142328097e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.190102e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.263149e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.263149e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294973e+00 +- 3.228584e-03 ) GeV^0 -TOTAL : 4.875075 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.964503e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.052988e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.052988e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287818e+00 +- 3.220110e-03 ) GeV^0 +TOTAL : 3.687002 sec INFO: No Floating Point Exceptions have been reported - 14,157,231,167 cycles # 2.902 GHz - 38,349,372,496 instructions # 2.71 insn per cycle - 4.880360280 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 596) (avx2: 0) (512y: 0) (512z: 0) + 12,604,001,470 cycles:u # 3.411 GHz (74.94%) + 7,571,589 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.80%) + 798,909,126 stalled-cycles-backend:u # 6.34% backend cycles idle (74.85%) + 37,090,557,686 instructions:u # 2.94 insn per cycle + # 0.02 stalled cycles per insn (75.06%) + 3.699024002 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 607) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234094e+00 -Avg ME (F77/C++) = 3.2340941932052374 -Relative difference = 5.974014286114415e-08 +Avg ME (F77/C++) = 3.2340939850546420 +Relative difference = 4.621188450363643e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.893708e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.295163e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.295163e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294972e+00 +- 3.228583e-03 ) GeV^0 -TOTAL : 2.231375 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 6.216562e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.634342e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.634342e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287818e+00 +- 3.220111e-03 ) GeV^0 +TOTAL : 1.833635 sec INFO: No Floating Point Exceptions have been reported - 6,474,839,888 cycles # 2.896 GHz - 15,821,273,128 instructions # 2.44 insn per cycle - 2.236825857 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2693) (avx2: 0) (512y: 0) (512z: 0) + 6,104,178,774 cycles:u # 3.313 GHz (74.71%) + 6,853,294 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.84%) + 2,101,538,414 stalled-cycles-backend:u # 34.43% backend cycles idle (74.84%) + 15,190,745,209 instructions:u # 2.49 insn per cycle + # 0.14 stalled cycles per insn (75.00%) + 1.858522382 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2462) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 3.234093e+00 -Avg ME (F77/C++) = 3.2340934062376618 -Relative difference = 1.2561100182708985e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 3.234094e+00 +Avg ME (F77/C++) = 3.2340941177681088 +Relative difference = 3.641455970126884e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.952001e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.027533e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.027533e+06 ) sec^-1 -MeanMatrixElemValue = ( 3.295004e+00 +- 3.229072e-03 ) GeV^0 -TOTAL : 1.258720 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 1.215158e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.372742e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.372742e+06 ) sec^-1 +MeanMatrixElemValue = ( 3.287796e+00 +- 3.219543e-03 ) GeV^0 +TOTAL : 1.009427 sec INFO: No Floating Point Exceptions have been reported - 3,454,982,692 cycles # 2.735 GHz - 7,599,041,128 instructions # 2.20 insn per cycle - 1.263980564 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3054) (512y: 0) (512z: 0) + 3,315,467,966 cycles:u # 3.257 GHz (74.85%) + 7,383,317 stalled-cycles-frontend:u # 0.22% frontend cycles idle (74.85%) + 1,098,772,693 stalled-cycles-backend:u # 33.14% backend cycles idle (74.85%) + 7,732,872,347 instructions:u # 2.33 insn per cycle + # 0.14 stalled cycles per insn (74.88%) + 1.021036802 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3076) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 3.234092e+00 -Avg ME (F77/C++) = 3.2340919882990420 -Relative difference = 3.6180040581126224e-09 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 3.234093e+00 +Avg ME (F77/C++) = 3.2340926420874894 +Relative difference = 1.1066858953654753e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.592851e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.112843e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.112843e+06 ) sec^-1 -MeanMatrixElemValue = ( 3.295004e+00 +- 3.229072e-03 ) GeV^0 -TOTAL : 1.180051 sec -INFO: No Floating Point Exceptions have been reported - 3,244,154,820 cycles # 2.739 GHz - 7,208,080,032 instructions # 2.22 insn per cycle - 1.185371954 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2854) (512y: 23) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 3.234092e+00 -Avg ME (F77/C++) = 3.2340919882990420 -Relative difference = 3.6180040581126224e-09 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.861599e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.601056e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.601056e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.295004e+00 +- 3.229072e-03 ) GeV^0 -TOTAL : 1.616099 sec -INFO: No Floating Point Exceptions have been reported - 3,061,871,050 cycles # 1.890 GHz - 5,840,738,200 instructions # 1.91 insn per cycle - 1.621459577 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2375) (512y: 24) (512z: 1889) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 3.234092e+00 -Avg ME (F77/C++) = 3.2340921289287508 -Relative difference = 3.986551736519174e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt index 17580b0829..abda009b1a 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:17:12 +DATE: 2024-05-16_16:45:54 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.907160e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.728602e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.048441e+08 ) sec^-1 -MeanMatrixElemValue = ( 3.294909e+00 +- 3.228140e-03 ) GeV^0 -TOTAL : 0.485743 sec +EvtsPerSec[Rmb+ME] (23) = ( 9.823498e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.096158e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.277282e+08 ) sec^-1 +MeanMatrixElemValue = ( 3.286435e+00 +- 3.209475e-03 ) GeV^0 +TOTAL : 0.322919 sec INFO: No Floating Point Exceptions have been reported - 2,023,423,533 cycles # 2.849 GHz - 2,905,255,031 instructions # 1.44 insn per cycle - 0.768600730 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 756,999,640 cycles:u # 2.187 GHz (74.49%) + 2,076,248 stalled-cycles-frontend:u # 0.27% frontend cycles idle (74.38%) + 5,285,191 stalled-cycles-backend:u # 0.70% backend cycles idle (76.34%) + 1,225,255,188 instructions:u # 1.62 insn per cycle + # 0.00 stalled cycles per insn (76.93%) + 0.375119226 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 3.234085e+00 -Avg ME (F77/GPU) = 3.2341253389604390 -Relative difference = 1.2473067479392238e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 3.234089e+00 +Avg ME (F77/GPU) = 3.2340912986546755 +Relative difference = 7.107580142328097e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.168782e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.238544e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.238544e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294973e+00 +- 3.228584e-03 ) GeV^0 -TOTAL : 4.921731 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.954161e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.044245e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.044245e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287818e+00 +- 3.220110e-03 ) GeV^0 +TOTAL : 3.697492 sec INFO: No Floating Point Exceptions have been reported - 14,314,886,956 cycles # 2.906 GHz - 39,834,092,366 instructions # 2.78 insn per cycle - 4.927032591 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 567) (avx2: 0) (512y: 0) (512z: 0) + 12,629,141,062 cycles:u # 3.408 GHz (74.96%) + 7,468,212 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.96%) + 9,965,210 stalled-cycles-backend:u # 0.08% backend cycles idle (74.96%) + 37,498,294,497 instructions:u # 2.97 insn per cycle + # 0.00 stalled cycles per insn (74.97%) + 3.710240742 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 500) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234094e+00 -Avg ME (F77/C++) = 3.2340941675938666 -Relative difference = 5.182096339328524e-08 +Avg ME (F77/C++) = 3.2340939850546420 +Relative difference = 4.621188450363643e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.713515e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.269520e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.269520e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294972e+00 +- 3.228583e-03 ) GeV^0 -TOTAL : 1.922771 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 7.011776e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.586789e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.586789e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287818e+00 +- 3.220111e-03 ) GeV^0 +TOTAL : 1.652272 sec INFO: No Floating Point Exceptions have been reported - 5,581,497,918 cycles # 2.896 GHz - 15,286,085,618 instructions # 2.74 insn per cycle - 1.928038449 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2473) (avx2: 0) (512y: 0) (512z: 0) + 5,332,655,349 cycles:u # 3.211 GHz (75.04%) + 6,983,998 stalled-cycles-frontend:u # 0.13% frontend cycles idle (74.96%) + 1,050,227,851 stalled-cycles-backend:u # 19.69% backend cycles idle (74.96%) + 15,177,218,540 instructions:u # 2.85 insn per cycle + # 0.07 stalled cycles per insn (74.95%) + 1.664682527 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2329) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 3.234093e+00 -Avg ME (F77/C++) = 3.2340934062376618 -Relative difference = 1.2561100182708985e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 3.234094e+00 +Avg ME (F77/C++) = 3.2340941177681088 +Relative difference = 3.641455970126884e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.348339e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.987488e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.987488e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.295004e+00 +- 3.229072e-03 ) GeV^0 -TOTAL : 1.738529 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 8.898362e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.711000e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.711000e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287796e+00 +- 3.219543e-03 ) GeV^0 +TOTAL : 1.325334 sec INFO: No Floating Point Exceptions have been reported - 4,748,584,350 cycles # 2.724 GHz - 9,734,762,909 instructions # 2.05 insn per cycle - 1.743720825 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3707) (512y: 0) (512z: 0) + 4,431,855,356 cycles:u # 3.323 GHz (74.81%) + 7,715,458 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.83%) + 1,686,083,115 stalled-cycles-backend:u # 38.04% backend cycles idle (74.83%) + 9,840,672,101 instructions:u # 2.22 insn per cycle + # 0.17 stalled cycles per insn (75.03%) + 1.339574896 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3749) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 3.234092e+00 -Avg ME (F77/C++) = 3.2340919817797840 -Relative difference = 5.633796441974414e-09 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 3.234093e+00 +Avg ME (F77/C++) = 3.2340926462784410 +Relative difference = 1.0937272340475427e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.524514e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.201131e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.201131e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.295004e+00 +- 3.229072e-03 ) GeV^0 -TOTAL : 1.693263 sec -INFO: No Floating Point Exceptions have been reported - 4,630,030,488 cycles # 2.727 GHz - 9,326,323,775 instructions # 2.01 insn per cycle - 1.698452247 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3495) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 3.234092e+00 -Avg ME (F77/C++) = 3.2340919817797840 -Relative difference = 5.633796441974414e-09 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.566237e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.043529e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.043529e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.295004e+00 +- 3.229072e-03 ) GeV^0 -TOTAL : 1.970968 sec -INFO: No Floating Point Exceptions have been reported - 3,659,262,236 cycles # 1.853 GHz - 7,035,706,161 instructions # 1.92 insn per cycle - 1.976219857 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2608) (512y: 12) (512z: 2220) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 3.234092e+00 -Avg ME (F77/C++) = 3.2340921270661056 -Relative difference = 3.928957668408837e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt index b504154b8b..6b2529e9fc 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:17:33 +DATE: 2024-05-16_16:46:06 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.734753e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.166290e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.275672e+08 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 0.527580 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.837198e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.016409e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.072163e+07 ) sec^-1 +MeanMatrixElemValue = ( 3.295059e+00 +- 3.224567e-03 ) GeV^0 +TOTAL : 0.383476 sec INFO: No Floating Point Exceptions have been reported - 2,184,025,819 cycles # 2.852 GHz - 3,120,664,968 instructions # 1.43 insn per cycle - 0.822365132 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 901,453,430 cycles:u # 2.224 GHz (74.25%) + 2,305,493 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.35%) + 5,372,210 stalled-cycles-backend:u # 0.60% backend cycles idle (74.94%) + 1,511,664,096 instructions:u # 1.68 insn per cycle + # 0.00 stalled cycles per insn (74.27%) + 0.438320391 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 3.234080e+00 -Avg ME (F77/GPU) = 3.2340795839181666 -Relative difference = 1.2865539301192385e-07 +Avg ME (F77/GPU) = 3.2340795839181671 +Relative difference = 1.2865539287460837e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.032702e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.091464e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.091464e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 5.267767 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.448788e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.510079e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.510079e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 +TOTAL : 4.465296 sec INFO: No Floating Point Exceptions have been reported - 15,275,610,730 cycles # 2.898 GHz - 38,585,204,587 instructions # 2.53 insn per cycle - 5.273127531 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 677) (avx2: 0) (512y: 0) (512z: 0) + 15,159,032,142 cycles:u # 3.385 GHz (75.02%) + 9,318,044 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.00%) + 876,595,791 stalled-cycles-backend:u # 5.78% backend cycles idle (74.99%) + 39,258,289,836 instructions:u # 2.59 insn per cycle + # 0.02 stalled cycles per insn (75.00%) + 4.481904018 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 747) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796721168488 Relative difference = 1.0138374786539113e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.478780e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.672331e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.672331e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 3.124457 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 4.439342e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.668048e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.668048e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 +TOTAL : 2.557997 sec INFO: No Floating Point Exceptions have been reported - 8,951,368,692 cycles # 2.862 GHz - 24,230,346,765 instructions # 2.71 insn per cycle - 3.129932357 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2188) (avx2: 0) (512y: 0) (512z: 0) + 8,608,096,320 cycles:u # 3.348 GHz (74.84%) + 9,186,224 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.85%) + 1,586,729,408 stalled-cycles-backend:u # 18.43% backend cycles idle (75.01%) + 23,998,639,412 instructions:u # 2.79 insn per cycle + # 0.07 stalled cycles per insn (75.11%) + 2.574633110 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2102) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796721168488 Relative difference = 1.0138374786539113e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.646169e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.144963e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.144963e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 1.966588 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 7.870031e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.493127e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.493127e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 +TOTAL : 1.516500 sec INFO: No Floating Point Exceptions have been reported - 5,394,193,630 cycles # 2.737 GHz - 11,282,079,100 instructions # 2.09 insn per cycle - 1.972075346 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2483) (512y: 0) (512z: 0) + 5,014,619,133 cycles:u # 3.279 GHz (74.89%) + 8,998,353 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.91%) + 531,408,134 stalled-cycles-backend:u # 10.60% backend cycles idle (74.91%) + 11,425,617,471 instructions:u # 2.28 insn per cycle + # 0.05 stalled cycles per insn (74.89%) + 1.532893502 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2470) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 -Avg ME (F77/C++) = 3.2340796772295590 -Relative difference = 9.980286234148268e-08 +Avg ME (F77/C++) = 3.2340796844996675 +Relative difference = 9.755489429022839e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.312770e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.933844e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.933844e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 1.769300 sec -INFO: No Floating Point Exceptions have been reported - 4,855,634,573 cycles # 2.737 GHz - 10,529,908,188 instructions # 2.17 insn per cycle - 1.774939787 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2170) (512y: 148) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 3.234080e+00 -Avg ME (F77/C++) = 3.2340796772295590 -Relative difference = 9.980286234148268e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.779051e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.993953e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.993953e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 2.883837 sec -INFO: No Floating Point Exceptions have been reported - 5,232,692,174 cycles # 1.812 GHz - 7,609,089,901 instructions # 1.45 insn per cycle - 2.889504238 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1633) (512y: 126) (512z: 1611) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 3.234080e+00 -Avg ME (F77/C++) = 3.2340796772295590 -Relative difference = 9.980286234148268e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt index 62b069d661..66608a5a11 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt @@ -1,218 +1,175 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=hip +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:17:56 +DATE: 2024-05-16_16:46:19 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe +========================================================================= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.743856e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.168884e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.279553e+08 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 0.531580 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.794547e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.927022e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.980434e+07 ) sec^-1 +MeanMatrixElemValue = ( 3.295059e+00 +- 3.224567e-03 ) GeV^0 +TOTAL : 0.381240 sec INFO: No Floating Point Exceptions have been reported - 2,155,818,187 cycles # 2.818 GHz - 3,085,690,683 instructions # 1.43 insn per cycle - 0.823819066 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 907,470,356 cycles:u # 2.238 GHz (74.21%) + 2,142,588 stalled-cycles-frontend:u # 0.24% frontend cycles idle (73.91%) + 5,471,565 stalled-cycles-backend:u # 0.60% backend cycles idle (74.41%) + 1,430,312,368 instructions:u # 1.58 insn per cycle + # 0.00 stalled cycles per insn (75.08%) + 0.433168931 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/runTest_hip.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 3.234080e+00 -Avg ME (F77/GPU) = 3.2340795839181666 -Relative difference = 1.2865539301192385e-07 +Avg ME (F77/GPU) = 3.2340795839181671 +Relative difference = 1.2865539287460837e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.002464e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.060011e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.060011e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 5.344548 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 2.407035e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.466607e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.466607e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 +TOTAL : 4.538384 sec INFO: No Floating Point Exceptions have been reported - 15,331,700,326 cycles # 2.866 GHz - 40,369,778,421 instructions # 2.63 insn per cycle - 5.350011304 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) + 15,625,722,006 cycles:u # 3.433 GHz (74.95%) + 9,832,695 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.04%) + 68,139,082 stalled-cycles-backend:u # 0.44% backend cycles idle (75.04%) + 40,075,526,509 instructions:u # 2.56 insn per cycle + # 0.00 stalled cycles per insn (75.04%) + 5.094128203 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 631) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796721168488 Relative difference = 1.0138374786539113e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.555017e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.755921e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.755921e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 3.059082 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 4.525563e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.755420e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.755420e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 +TOTAL : 2.499111 sec INFO: No Floating Point Exceptions have been reported - 8,522,277,742 cycles # 2.782 GHz - 23,253,428,254 instructions # 2.73 insn per cycle - 3.064709896 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2090) (avx2: 0) (512y: 0) (512z: 0) + 8,491,243,161 cycles:u # 3.378 GHz (74.82%) + 10,450,945 stalled-cycles-frontend:u # 0.12% frontend cycles idle (74.87%) + 547,161,406 stalled-cycles-backend:u # 6.44% backend cycles idle (74.90%) + 23,487,414,757 instructions:u # 2.77 insn per cycle + # 0.02 stalled cycles per insn (75.06%) + 2.739310858 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1992) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796721168488 Relative difference = 1.0138374786539113e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.699594e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.044812e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.044812e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 2.340476 sec +OMP threads / `nproc --all` = 1 / 128 +EvtsPerSec[Rmb+ME] (23) = ( 6.869246e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.340004e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.340004e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 +TOTAL : 1.709634 sec INFO: No Floating Point Exceptions have been reported - 6,239,696,903 cycles # 2.661 GHz - 12,963,096,678 instructions # 2.08 insn per cycle - 2.346005075 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2668) (512y: 0) (512z: 0) + 5,683,454,886 cycles:u # 3.299 GHz (74.96%) + 10,084,749 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.93%) + 600,467,137 stalled-cycles-backend:u # 10.57% backend cycles idle (74.94%) + 13,037,365,443 instructions:u # 2.29 insn per cycle + # 0.05 stalled cycles per insn (74.94%) + 1.726283384 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2710) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 -Avg ME (F77/C++) = 3.2340796772295590 -Relative difference = 9.980286234148268e-08 +Avg ME (F77/C++) = 3.2340796844996675 +Relative difference = 9.755489429022839e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.032659e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.430530e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.430530e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 2.191599 sec -INFO: No Floating Point Exceptions have been reported - 5,901,015,524 cycles # 2.687 GHz - 12,238,387,260 instructions # 2.07 insn per cycle - 2.197121947 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2208) (512y: 296) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 3.234080e+00 -Avg ME (F77/C++) = 3.2340796772295590 -Relative difference = 9.980286234148268e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.554826e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.745267e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.745267e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 -TOTAL : 3.058656 sec -INFO: No Floating Point Exceptions have been reported - 5,596,491,041 cycles # 1.827 GHz - 8,743,545,379 instructions # 1.56 insn per cycle - 3.064278596 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1490) (512y: 183) (512z: 1908) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe -[ PASSED ] 3 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 3.234080e+00 -Avg ME (F77/C++) = 3.2340796772295590 -Relative difference = 9.980286234148268e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED From be946ae925130dc214c553dca43d0951a932078e Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 17 May 2024 08:45:56 +0200 Subject: [PATCH 45/48] [jtmk2] rerun 30 tmad tests on LUMI - all as expected ./tmad/allTees.sh -short -nobsm |& tee pippotmads STARTED AT Thu 16 May 2024 08:42:52 PM EEST (SM tests) ENDED(1) AT Thu 16 May 2024 08:57:09 PM EEST [Status=0] (BSM tests) ENDED(1) AT Thu 16 May 2024 08:57:09 PM EEST [Status=0] ./tmad/allTees.sh -short -bsmonly |& tee pippotmadb STARTED AT Thu 16 May 2024 09:00:59 PM EEST (SM tests) ENDED(1) AT Thu 16 May 2024 09:00:59 PM EEST [Status=0] (BSM tests) ENDED(1) AT Thu 16 May 2024 09:08:27 PM EEST [Status=0] ./tmad/allTees.sh -ggttggg |& tee pippotmadg STARTED AT Thu 16 May 2024 08:42:07 PM EEST (SM tests) ENDED(1) AT Thu 16 May 2024 11:17:00 PM EEST [Status=0] (BSM tests) ENDED(1) AT Thu 16 May 2024 11:17:00 PM EEST [Status=0] 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt 12 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt 12 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt 12 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt 1 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt 0 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt 0 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt 0 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt 0 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt 0 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt 0 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt --- .../log_eemumu_mad_d_inl0_hrd0.txt | 434 ++++++---------- .../log_eemumu_mad_f_inl0_hrd0.txt | 442 ++++++---------- .../log_eemumu_mad_m_inl0_hrd0.txt | 424 +++++----------- .../log_ggtt_mad_d_inl0_hrd0.txt | 434 ++++++---------- .../log_ggtt_mad_f_inl0_hrd0.txt | 436 ++++++---------- .../log_ggtt_mad_m_inl0_hrd0.txt | 426 ++++++---------- .../log_ggttg_mad_d_inl0_hrd0.txt | 446 ++++++----------- .../log_ggttg_mad_f_inl0_hrd0.txt | 448 ++++++----------- .../log_ggttg_mad_m_inl0_hrd0.txt | 448 ++++++----------- .../log_ggttgg_mad_d_inl0_hrd0.txt | 448 ++++++----------- .../log_ggttgg_mad_f_inl0_hrd0.txt | 450 ++++++----------- .../log_ggttgg_mad_m_inl0_hrd0.txt | 450 ++++++----------- .../log_ggttggg_mad_d_inl0_hrd0.txt | 438 ++++++---------- .../log_ggttggg_mad_f_inl0_hrd0.txt | 444 ++++++---------- .../log_ggttggg_mad_m_inl0_hrd0.txt | 438 ++++++---------- .../log_gqttq_mad_d_inl0_hrd0.txt | 470 +++++------------ .../log_gqttq_mad_f_inl0_hrd0.txt | 472 +++++------------- .../log_gqttq_mad_m_inl0_hrd0.txt | 471 +++++------------ .../log_heftggbb_mad_d_inl0_hrd0.txt | 438 ++++++---------- .../log_heftggbb_mad_f_inl0_hrd0.txt | 114 ++--- .../log_heftggbb_mad_m_inl0_hrd0.txt | 446 ++++++----------- .../log_smeftggtttt_mad_d_inl0_hrd0.txt | 442 ++++++---------- .../log_smeftggtttt_mad_f_inl0_hrd0.txt | 438 ++++++---------- .../log_smeftggtttt_mad_m_inl0_hrd0.txt | 434 ++++++---------- .../log_susyggt1t1_mad_d_inl0_hrd0.txt | 76 +-- .../log_susyggt1t1_mad_f_inl0_hrd0.txt | 76 +-- .../log_susyggt1t1_mad_m_inl0_hrd0.txt | 74 +-- .../log_susyggtt_mad_d_inl0_hrd0.txt | 86 ++-- .../log_susyggtt_mad_f_inl0_hrd0.txt | 86 ++-- .../log_susyggtt_mad_m_inl0_hrd0.txt | 88 ++-- 30 files changed, 3597 insertions(+), 7220 deletions(-) diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index 41d66d8253..1fe71e4d65 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum - -make USEBUILDDIR=1 BACKEND=cuda - +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2024-05-16_01:25:56 +DATE: 2024-05-16_20:43:11 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09338 [9.3382715404661518E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3798 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.7231s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7147s - [COUNTERS] Fortran MEs ( 1 ) : 0.0084s for 8192 events => throughput is 9.76E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5477s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5417s + [COUNTERS] Fortran MEs ( 1 ) : 0.0060s for 8192 events => throughput is 1.37E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09338 [9.3382715404661518E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1857s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1771s - [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.1346s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1287s + [COUNTERS] Fortran MEs ( 1 ) : 0.0059s for 8192 events => throughput is 1.39E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/output_eemumu_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x10_fortran > /tmp/valassia/output_eemumu_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3949s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3024s - [COUNTERS] Fortran MEs ( 1 ) : 0.0925s for 90112 events => throughput is 9.74E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2774s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2147s + [COUNTERS] Fortran MEs ( 1 ) : 0.0627s for 90112 events => throughput is 1.44E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661545E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1913s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1844s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0069s for 8192 events => throughput is 1.18E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1582s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1522s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0060s for 8192 events => throughput is 1.36E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715404661545E-002) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (9.3382715404661518E-002) and cpp (9.3382715404661532E-002) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09152 [9.1515602020000780E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3835s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3081s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0754s for 90112 events => throughput is 1.20E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2876s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2218s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0658s for 90112 events => throughput is 1.37E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000753E-002) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000780E-002) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.192081e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.404212e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.204613e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.361935e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09338 [9.3382715404661518E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1864s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1822s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0042s for 8192 events => throughput is 1.95E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1464s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1425s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0039s for 8192 events => throughput is 2.12E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715404661532E-002) differ by less than 3E-14 (0.0) +OK! xsec from fortran (9.3382715404661518E-002) and cpp (9.3382715404661518E-002) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,8 +233,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -242,9 +242,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3602s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3125s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0476s for 90112 events => throughput is 1.89E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2586s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2197s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0390s for 90112 events => throughput is 2.31E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -255,14 +255,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000753E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.953763e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.394617e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.009226e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.428149e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,8 +276,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -285,13 +285,13 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1833s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1802s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.59E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1339s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1314s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.28E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715404661532E-002) differ by less than 3E-14 (0.0) +OK! xsec from fortran (9.3382715404661518E-002) and cpp (9.3382715404661532E-002) differ by less than 3E-14 (2.220446049250313e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,114 +309,44 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3431s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3065s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0366s for 90112 events => throughput is 2.46E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2444s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2169s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0275s for 90112 events => throughput is 3.28E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000739E-002) differ by less than 3E-14 (3.3306690738754696e-16) +OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000753E-002) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.541984e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.417944e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.616899e+06 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1841s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1810s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.69E+06 events/s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.533378e+06 ) sec^-1 -OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715404661532E-002) differ by less than 3E-14 (0.0) +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3388s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3042s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0346s for 90112 events => throughput is 2.61E+06 events/s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000739E-002) differ by less than 3E-14 (3.3306690738754696e-16) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.662866e+06 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.883371e+06 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -428,104 +358,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1859s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1818s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0041s for 8192 events => throughput is 1.99E+06 events/s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715404661532E-002) differ by less than 3E-14 (0.0) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3474s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3044s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0430s for 90112 events => throughput is 2.10E+06 events/s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000739E-002) differ by less than 3E-14 (3.3306690738754696e-16) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.029340e+06 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.231218e+06 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09338 [9.3382715404661545E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.6140s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6135s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.60E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.4581s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4577s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.75E+07 events/s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661532E-002) and cuda (9.3382715404661532E-002) differ by less than 3E-14 (0.0) +OK! xsec from fortran (9.3382715404661518E-002) and hip (9.3382715404661545E-002) differ by less than 3E-14 (2.220446049250313e-16) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -537,67 +391,65 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.7363s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7314s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0049s for 90112 events => throughput is 1.86E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.5024s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4979s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0045s for 90112 events => throughput is 2.00E+07 events/s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cuda (9.1515602020000753E-002) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (9.1515602020000766E-002) and hip (9.1515602020000753E-002) differ by less than 3E-14 (1.1102230246251565e-16) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.277665e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.155916e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.916168e+08 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.548424e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.959957e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.205050e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.493136e+08 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.861671e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.970202e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.220481e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.040191e+08 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.950417e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.002261e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.191300e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.140061e+08 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.568594e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index c4c8099bbf..0ed9fa9829 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum - -make USEBUILDDIR=1 BACKEND=cuda - +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2024-05-16_01:26:13 +DATE: 2024-05-16_20:43:22 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09338 [9.3382715404661518E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3798 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.7287s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7200s - [COUNTERS] Fortran MEs ( 1 ) : 0.0087s for 8192 events => throughput is 9.42E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5129s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5070s + [COUNTERS] Fortran MEs ( 1 ) : 0.0059s for 8192 events => throughput is 1.39E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09338 [9.3382715404661518E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1869s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1783s - [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.57E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.1364s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1305s + [COUNTERS] Fortran MEs ( 1 ) : 0.0059s for 8192 events => throughput is 1.39E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/output_eemumu_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x10_fortran > /tmp/valassia/output_eemumu_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3902s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2987s - [COUNTERS] Fortran MEs ( 1 ) : 0.0916s for 90112 events => throughput is 9.84E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2788s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2163s + [COUNTERS] Fortran MEs ( 1 ) : 0.0625s for 90112 events => throughput is 1.44E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382703205998396E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09338 [9.3382701684199335E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1903s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1836s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0067s for 8192 events => throughput is 1.22E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1416s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1364s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0052s for 8192 events => throughput is 1.58E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382703205998396E-002) differ by less than 4E-4 (1.306308462512007e-07) +OK! xsec from fortran (9.3382715404661518E-002) and cpp (9.3382701684199335E-002) differ by less than 4E-4 (1.4692721372888684e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515590123565249E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09152 [9.1515588842633111E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3825s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3093s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0732s for 90112 events => throughput is 1.23E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2754s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2183s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0571s for 90112 events => throughput is 1.58E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515590123565249E-002) differ by less than 4E-4 (1.2999352305698153e-07) +OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515588842633111E-002) differ by less than 4E-4 (1.439903947186849e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.260929e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.646018e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.250210e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.653973e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382700723828302E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09338 [9.3382719831741665E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1813s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1787s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.15E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1319s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1298s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0021s for 8192 events => throughput is 3.87E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382700723828302E-002) differ by less than 4E-4 (1.5721146218172777e-07) +OK! xsec from fortran (9.3382715404661518E-002) and cpp (9.3382719831741665E-002) differ by less than 4E-4 (4.740791825774693e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515587612890761E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09152 [9.1515606481761602E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3288s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2997s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0290s for 90112 events => throughput is 3.10E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2386s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2154s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0232s for 90112 events => throughput is 3.88E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515587612890761E-002) differ by less than 4E-4 (1.5742791048545257e-07) +OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515606481761602E-002) differ by less than 4E-4 (4.875410031246474e-08) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.206836e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.034071e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.334282e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.150765e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382700679354239E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09338 [9.3382719700521907E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1835s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1811s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0024s for 8192 events => throughput is 3.38E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1324s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1307s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.60E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382700679354239E-002) differ by less than 4E-4 (1.576877179942926e-07) +OK! xsec from fortran (9.3382715404661518E-002) and cpp (9.3382719700521907E-002) differ by less than 4E-4 (4.6002735842876064e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,190 +309,44 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515587619408464E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09152 [9.1515606480805645E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3304s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3034s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0270s for 90112 events => throughput is 3.34E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2352s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2156s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0196s for 90112 events => throughput is 4.60E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515587619408464E-002) differ by less than 4E-4 (1.573566908996682e-07) +OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515606480805645E-002) differ by less than 4E-4 (4.874365444607065e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.522447e+06 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.386931e+06 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382700679354239E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1829s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1805s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0024s for 8192 events => throughput is 3.41E+06 events/s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382700679354239E-002) differ by less than 4E-4 (1.576877179942926e-07) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515587619408464E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3301s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3033s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0268s for 90112 events => throughput is 3.37E+06 events/s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515587619408464E-002) differ by less than 4E-4 (1.573566908996682e-07) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.528072e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.888044e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.720927e+06 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382704335459282E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1844s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1819s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.21E+06 events/s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382704335459282E-002) differ by less than 4E-4 (1.1853587900123586e-07) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515591296252558E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3371s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3080s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0291s for 90112 events => throughput is 3.10E+06 events/s +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.038351e+06 ) sec^-1 -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515591296252558E-002) differ by less than 4E-4 (1.1717945325173673e-07) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.341186e+06 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.598530e+06 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -504,28 +358,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382706077425631E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09338 [9.3382704338101225E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.6090s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6085s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.68E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.4120s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4117s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0003s for 8192 events => throughput is 2.90E+07 events/s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661532E-002) and cuda (9.3382706077425631E-002) differ by less than 4E-4 (9.988182347875352e-08) +OK! xsec from fortran (9.3382715404661518E-002) and hip (9.3382704338101225E-002) differ by less than 4E-4 (1.1850758729892164e-07) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -537,67 +391,65 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515592892887687E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09152 [9.1515591361999701E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.7344s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7297s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0046s for 90112 events => throughput is 1.95E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.4986s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4956s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 90112 events => throughput is 3.00E+07 events/s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cuda (9.1515592892887687E-002) differ by less than 4E-4 (9.973286385633884e-08) +OK! xsec from fortran (9.1515602020000766E-002) and hip (9.1515591361999701E-002) differ by less than 4E-4 (1.1646102771045719e-07) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.546893e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.728619e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.804903e+08 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.078851e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.477327e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.339646e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.060127e+09 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.590578e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.389797e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.334954e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.251129e+09 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.676438e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.752691e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.104281e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.481445e+08 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.666333e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index fc86f120db..8961dc7493 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum - - -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2024-05-16_01:26:29 +DATE: 2024-05-16_20:43:34 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09338 [9.3382715404661518E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3798 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.7237s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7153s - [COUNTERS] Fortran MEs ( 1 ) : 0.0084s for 8192 events => throughput is 9.74E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5058s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5000s + [COUNTERS] Fortran MEs ( 1 ) : 0.0059s for 8192 events => throughput is 1.40E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09338 [9.3382715404661518E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1861s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1773s - [COUNTERS] Fortran MEs ( 1 ) : 0.0087s for 8192 events => throughput is 9.40E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.1355s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1297s + [COUNTERS] Fortran MEs ( 1 ) : 0.0059s for 8192 events => throughput is 1.40E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/output_eemumu_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x10_fortran > /tmp/valassia/output_eemumu_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.4045s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3124s - [COUNTERS] Fortran MEs ( 1 ) : 0.0920s for 90112 events => throughput is 9.79E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2769s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2145s + [COUNTERS] Fortran MEs ( 1 ) : 0.0625s for 90112 events => throughput is 1.44E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715420701395E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09338 [9.3382715420701354E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1967s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1895s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0072s for 8192 events => throughput is 1.14E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1415s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1356s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0060s for 8192 events => throughput is 1.37E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715420701395E-002) differ by less than 2E-4 (1.7176482458580722e-10) +OK! xsec from fortran (9.3382715404661518E-002) and cpp (9.3382715420701354E-002) differ by less than 2E-4 (1.717646025412023e-10) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,8 +157,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -166,9 +166,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602033080859E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3901s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3110s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0791s for 90112 events => throughput is 1.14E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2855s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2197s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0658s for 90112 events => throughput is 1.37E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -179,14 +179,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602033080859E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.191141e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.407330e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.200935e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.423508e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,8 +200,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,13 +209,13 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715420701354E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1857s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1815s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0041s for 8192 events => throughput is 1.98E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1363s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1328s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0035s for 8192 events => throughput is 2.35E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715420701354E-002) differ by less than 2E-4 (1.7176438049659737e-10) +OK! xsec from fortran (9.3382715404661518E-002) and cpp (9.3382715420701354E-002) differ by less than 2E-4 (1.717646025412023e-10) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,8 +233,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -242,9 +242,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602033080859E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3486s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3030s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0456s for 90112 events => throughput is 1.98E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2558s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2174s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0384s for 90112 events => throughput is 2.35E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -255,14 +255,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602033080859E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.010123e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.467655e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.071657e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.503347e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,8 +276,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -285,13 +285,13 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1848s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1815s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.49E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1334s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1309s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.18E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715383664494E-002) differ by less than 2E-4 (2.2484925032983938e-10) +OK! xsec from fortran (9.3382715404661518E-002) and cpp (9.3382715383664494E-002) differ by less than 2E-4 (2.2484913930753692e-10) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,8 +309,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -318,9 +318,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3402s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3039s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0363s for 90112 events => throughput is 2.49E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2519s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2224s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0295s for 90112 events => throughput is 3.06E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -331,92 +331,22 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602022697845E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.462700e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.346047e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.639506e+06 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1850s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1819s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.61E+06 events/s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715383664494E-002) differ by less than 2E-4 (2.2484925032983938e-10) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3431s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3079s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0351s for 90112 events => throughput is 2.56E+06 events/s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.422737e+06 ) sec^-1 -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602022697845E-002) differ by less than 2E-4 (2.947131427788463e-11) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.637002e+06 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.800572e+06 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -428,28 +358,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09338 [9.3382715392009222E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1865s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1828s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0037s for 8192 events => throughput is 2.21E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4116s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4111s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.77E+07 events/s -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715383664494E-002) differ by less than 2E-4 (2.2484925032983938e-10) +OK! xsec from fortran (9.3382715404661518E-002) and hip (9.3382715392009222E-002) differ by less than 2E-4 (1.3548862032308762e-10) -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -461,143 +391,65 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3487s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3069s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0418s for 90112 events => throughput is 2.16E+06 events/s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602022697845E-002) differ by less than 2E-4 (2.947131427788463e-11) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.197743e+06 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.278261e+06 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715392009194E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.6095s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6090s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.64E+07 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.3382715404661532E-002) and cuda (9.3382715392009194E-002) differ by less than 2E-4 (1.3548906441229747e-10) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602021089631E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.7369s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7319s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0050s for 90112 events => throughput is 1.81E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.4970s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4925s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0045s for 90112 events => throughput is 2.02E+07 events/s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cuda (9.1515602021089631E-002) differ by less than 2E-4 (1.1898038110302878e-11) +OK! xsec from fortran (9.1515602020000766E-002) and hip (9.1515602021089631E-002) differ by less than 2E-4 (1.1898038110302878e-11) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.356139e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.150838e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.953546e+08 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.551007e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.960740e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.214814e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.522141e+08 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.892921e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.009432e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.264688e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.090602e+08 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.974527e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.972046e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.187799e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.157381e+08 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.568186e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index e1be7813b6..f8550097b2 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx - -make USEBUILDDIR=1 BACKEND=cuda - +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-05-16_01:26:45 +DATE: 2024-05-16_20:43:46 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 + [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=0 [UNWEIGHT] Wrote 2601 events (found 5405 events) - [COUNTERS] PROGRAM TOTAL : 0.8221s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7787s - [COUNTERS] Fortran MEs ( 1 ) : 0.0433s for 8192 events => throughput is 1.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6603s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6317s + [COUNTERS] Fortran MEs ( 1 ) : 0.0286s for 8192 events => throughput is 2.86E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 + [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4146s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3704s - [COUNTERS] Fortran MEs ( 1 ) : 0.0442s for 8192 events => throughput is 1.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3142s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2856s + [COUNTERS] Fortran MEs ( 1 ) : 0.0286s for 8192 events => throughput is 2.87E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/output_ggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x10_fortran > /tmp/valassia/output_ggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7473s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2714s - [COUNTERS] Fortran MEs ( 1 ) : 0.4758s for 90112 events => throughput is 1.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3363s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0237s + [COUNTERS] Fortran MEs ( 1 ) : 0.3126s for 90112 events => throughput is 2.88E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4516s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4120s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0396s for 8192 events => throughput is 2.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3667s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3346s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0321s for 8192 events => throughput is 2.55E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094184803756647) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (47.094184803756626) and cpp (47.094184803756640) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 + [XSECTION] Cross section = 47.11 [47.105695279989099] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.8025s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3670s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4355s for 90112 events => throughput is 2.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4349s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0752s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3598s for 90112 events => throughput is 2.50E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989121) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989099) differ by less than 3E-14 (3.3306690738754696e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.132783e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.622701e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.139840e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.626013e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094184803756619] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4276s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4032s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0244s for 8192 events => throughput is 3.36E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3247s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3067s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0179s for 8192 events => throughput is 4.57E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094184803756640) differ by less than 3E-14 (0.0) +OK! xsec from fortran (47.094184803756626) and cpp (47.094184803756619) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105695279989106] fbridge_mode=1 + [XSECTION] Cross section = 47.11 [47.105695279989085] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.6069s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3541s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2528s for 90112 events => throughput is 3.56E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.2437s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0464s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1973s for 90112 events => throughput is 4.57E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989106) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989085) differ by less than 3E-14 (5.551115123125783e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.613591e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.622662e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.632391e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.651962e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3994s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3855s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0139s for 8192 events => throughput is 5.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3091s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2988s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0103s for 8192 events => throughput is 7.99E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094184803756626) differ by less than 3E-14 (3.3306690738754696e-16) +OK! xsec from fortran (47.094184803756626) and cpp (47.094184803756640) differ by less than 3E-14 (2.220446049250313e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,190 +309,44 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 + [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.5056s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3497s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1559s for 90112 events => throughput is 5.78E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.1495s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0366s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1128s for 90112 events => throughput is 7.99E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989121) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989114) differ by less than 3E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.889737e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.257087e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.921310e+05 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3995s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3870s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0125s for 8192 events => throughput is 6.55E+05 events/s +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.334302e+05 ) sec^-1 -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094184803756626) differ by less than 3E-14 (3.3306690738754696e-16) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 - [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4813s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3413s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1399s for 90112 events => throughput is 6.44E+05 events/s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989121) differ by less than 3E-14 (2.220446049250313e-16) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.578026e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.673606e+05 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4212s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3991s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0221s for 8192 events => throughput is 3.70E+05 events/s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.094184803756640) and cpp (47.094184803756626) differ by less than 3E-14 (3.3306690738754696e-16) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 - [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.5933s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3549s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2384s for 90112 events => throughput is 3.78E+05 events/s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989121) differ by less than 3E-14 (2.220446049250313e-16) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.816986e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.814285e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -504,8 +358,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -513,19 +367,19 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.8124s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8118s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.42E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.5849s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5842s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.16E+07 events/s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cuda (47.094184803756640) differ by less than 3E-14 (0.0) +OK! xsec from fortran (47.094184803756626) and hip (47.094184803756640) differ by less than 3E-14 (2.220446049250313e-16) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -537,8 +391,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -546,58 +400,56 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7642s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7574s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0067s for 90112 events => throughput is 1.34E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.3252s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3174s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0078s for 90112 events => throughput is 1.15E+07 events/s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cuda (47.105695279989121) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (47.105695279989114) and hip (47.105695279989121) differ by less than 3E-14 (2.220446049250313e-16) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.120396e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.296184e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.622859e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.023795e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.177398e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.754752e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.080565e+08 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.750038e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.172657e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.773188e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.155839e+08 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.952920e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.173872e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.747788e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.068966e+07 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.148755e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 0b367d2d96..a5639eedd6 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-05-16_01:27:12 +DATE: 2024-05-16_20:44:04 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 + [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=0 [UNWEIGHT] Wrote 2601 events (found 5405 events) - [COUNTERS] PROGRAM TOTAL : 0.8191s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7751s - [COUNTERS] Fortran MEs ( 1 ) : 0.0439s for 8192 events => throughput is 1.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5861s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5576s + [COUNTERS] Fortran MEs ( 1 ) : 0.0285s for 8192 events => throughput is 2.87E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 + [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4113s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3679s - [COUNTERS] Fortran MEs ( 1 ) : 0.0434s for 8192 events => throughput is 1.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3178s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2892s + [COUNTERS] Fortran MEs ( 1 ) : 0.0286s for 8192 events => throughput is 2.87E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/output_ggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x10_fortran > /tmp/valassia/output_ggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7478s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2691s - [COUNTERS] Fortran MEs ( 1 ) : 0.4787s for 90112 events => throughput is 1.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3422s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0298s + [COUNTERS] Fortran MEs ( 1 ) : 0.3124s for 90112 events => throughput is 2.88E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094179780921394] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094178241446492] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4476s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4108s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0368s for 8192 events => throughput is 2.23E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3434s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3159s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0275s for 8192 events => throughput is 2.98E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094179780921394) differ by less than 4E-4 (1.0665510541407741e-07) +OK! xsec from fortran (47.094184803756626) and cpp (47.094178241446492) differ by less than 4E-4 (1.3934438314322506e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105688579298537] fbridge_mode=1 + [XSECTION] Cross section = 47.11 [47.105686930681671] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7717s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3659s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4058s for 90112 events => throughput is 2.22E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3644s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0602s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3042s for 90112 events => throughput is 2.96E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105688579298537) differ by less than 4E-4 (1.4224799227413598e-07) +OK! xsec from fortran (47.105695279989114) and cpp (47.105686930681671) differ by less than 4E-4 (1.7724624157278157e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.257844e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.093104e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.292052e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.093112e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094175850060040] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094176373190514] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4035s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3878s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0157s for 8192 events => throughput is 5.23E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3157s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3029s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0128s for 8192 events => throughput is 6.38E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094175850060040) differ by less than 4E-4 (1.9012318908107062e-07) +OK! xsec from fortran (47.094184803756626) and cpp (47.094176373190514) differ by less than 4E-4 (1.7901501314643298e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105684763984058] fbridge_mode=1 + [XSECTION] Cross section = 47.11 [47.105685173093654] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.5166s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3428s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1737s for 90112 events => throughput is 5.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.1839s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0431s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1408s for 90112 events => throughput is 6.40E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105684763984058) differ by less than 4E-4 (2.2324275217311396e-07) +OK! xsec from fortran (47.105695279989114) and cpp (47.105685173093654) differ by less than 4E-4 (2.1455782361901043e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.218996e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.532441e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.263655e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.807352e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094173652938650] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094174474272364] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3915s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3833s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0082s for 8192 events => throughput is 1.00E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3033s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2968s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 8192 events => throughput is 1.24E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094173652938650) differ by less than 4E-4 (2.3677696170398832e-07) +OK! xsec from fortran (47.094184803756626) and cpp (47.094174474272364) differ by less than 4E-4 (2.1933672500473733e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,190 +309,44 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105684048677361] fbridge_mode=1 + [XSECTION] Cross section = 47.11 [47.105684585116684] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4300s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3376s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0925s for 90112 events => throughput is 9.74E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.1035s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0342s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0693s for 90112 events => throughput is 1.30E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105684048677361) differ by less than 4E-4 (2.384278946498952e-07) +OK! xsec from fortran (47.105695279989114) and cpp (47.105684585116684) differ by less than 4E-4 (2.2703990176786704e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.896073e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.940492e+05 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094173652938650] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3898s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3819s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0079s for 8192 events => throughput is 1.04E+06 events/s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.094184803756640) and cpp (47.094173652938650) differ by less than 4E-4 (2.3677696170398832e-07) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105684048677361] fbridge_mode=1 - [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4168s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3312s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0856s for 90112 events => throughput is 1.05E+06 events/s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.105695279989114) and cpp (47.105684048677361) differ by less than 4E-4 (2.384278946498952e-07) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.032975e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.370067e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.050779e+06 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094178213275804] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3948s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3837s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0111s for 8192 events => throughput is 7.41E+05 events/s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.094184803756640) and cpp (47.094178213275804) differ by less than 4E-4 (1.3994256109484127e-07) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.383213e+06 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105688407939567] fbridge_mode=1 - [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4646s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3397s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1249s for 90112 events => throughput is 7.22E+05 events/s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.105695279989114) and cpp (47.105688407939567) differ by less than 4E-4 (1.4588574703822133e-07) +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.304914e+05 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.408593e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -504,28 +358,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184344050284] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094176770070867] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.8097s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8091s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.50E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.5742s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5738s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0004s for 8192 events => throughput is 2.09E+07 events/s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cuda (47.094184344050284) differ by less than 4E-4 (9.761425112664313e-09) +OK! xsec from fortran (47.094184803756626) and hip (47.094176770070867) differ by less than 4E-4 (1.705876382374072e-07) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -537,67 +391,65 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105694586476879] fbridge_mode=1 + [XSECTION] Cross section = 47.11 [47.105687115703695] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7654s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7594s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0060s for 90112 events => throughput is 1.51E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.3245s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3206s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0039s for 90112 events => throughput is 2.32E+07 events/s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cuda (47.105694586476879) differ by less than 4E-4 (1.4722471020078842e-08) +OK! xsec from fortran (47.105695279989114) and hip (47.105687115703695) differ by less than 4E-4 (1.733184357144424e-07) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.397168e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.923699e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.912682e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.089089e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.099083e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.079244e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.785250e+08 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.979394e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.065057e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.082611e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.885291e+08 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.073119e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.649544e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.172672e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.436840e+07 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.364268e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index 197f6200da..fd0e858a7b 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx - - -make USEBUILDDIR=1 BACKEND=cuda +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-05-16_01:27:37 +DATE: 2024-05-16_20:44:22 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 + [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=0 [UNWEIGHT] Wrote 2601 events (found 5405 events) - [COUNTERS] PROGRAM TOTAL : 0.8243s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7810s - [COUNTERS] Fortran MEs ( 1 ) : 0.0433s for 8192 events => throughput is 1.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7637s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7352s + [COUNTERS] Fortran MEs ( 1 ) : 0.0285s for 8192 events => throughput is 2.87E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 + [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4109s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3676s - [COUNTERS] Fortran MEs ( 1 ) : 0.0433s for 8192 events => throughput is 1.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3188s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2901s + [COUNTERS] Fortran MEs ( 1 ) : 0.0288s for 8192 events => throughput is 2.85E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/output_ggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x10_fortran > /tmp/valassia/output_ggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7536s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2749s - [COUNTERS] Fortran MEs ( 1 ) : 0.4787s for 90112 events => throughput is 1.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3410s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0285s + [COUNTERS] Fortran MEs ( 1 ) : 0.3125s for 90112 events => throughput is 2.88E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,8 +124,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -133,13 +133,13 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094186141863901] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4520s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4121s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0400s for 8192 events => throughput is 2.05E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3575s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3243s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0332s for 8192 events => throughput is 2.47E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094186141863901) differ by less than 2E-4 (2.8413428720952538e-08) +OK! xsec from fortran (47.094184803756626) and cpp (47.094186141863901) differ by less than 2E-4 (2.8413428942997143e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,8 +157,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -166,9 +166,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696630006634] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.8139s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3740s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4399s for 90112 events => throughput is 2.05E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4261s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0618s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3643s for 90112 events => throughput is 2.47E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -179,14 +179,14 @@ OK! xsec from fortran (47.105695279989114) and cpp (47.105696630006634) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.079375e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.234666e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.079476e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.455754e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094186141863901] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094186141863908] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4154s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3930s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0224s for 8192 events => throughput is 3.67E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3252s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3074s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0178s for 8192 events => throughput is 4.61E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094186141863901) differ by less than 2E-4 (2.8413428720952538e-08) +OK! xsec from fortran (47.094184803756626) and cpp (47.094186141863908) differ by less than 2E-4 (2.8413429165041748e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,8 +233,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -242,9 +242,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696630006626] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.5989s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3477s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2513s for 90112 events => throughput is 3.59E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.2519s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0490s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2029s for 90112 events => throughput is 4.44E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -255,14 +255,14 @@ OK! xsec from fortran (47.105695279989114) and cpp (47.105696630006626) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.535235e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.661337e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.688874e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.698309e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094186193208834] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4007s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3868s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0139s for 8192 events => throughput is 5.90E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3099s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2998s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0101s for 8192 events => throughput is 8.15E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094186169585456) differ by less than 2E-4 (2.9002069412698006e-08) +OK! xsec from fortran (47.094184803756626) and cpp (47.094186193208834) differ by less than 2E-4 (2.9503689491505725e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,114 +309,44 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1 + [XSECTION] Cross section = 47.11 [47.105696667630852] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.5000s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3471s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1530s for 90112 events => throughput is 5.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.1506s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0399s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1108s for 90112 events => throughput is 8.13E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105696663215774) differ by less than 2E-4 (2.9364318976377035e-08) +OK! xsec from fortran (47.105695279989114) and cpp (47.105696667630852) differ by less than 2E-4 (2.9458046002517335e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.781843e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.449938e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.986506e+05 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4023s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3898s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0125s for 8192 events => throughput is 6.55E+05 events/s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.094184803756640) and cpp (47.094186169585456) differ by less than 2E-4 (2.9002069412698006e-08) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.502608e+05 ) sec^-1 -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1 - [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4677s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3316s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1361s for 90112 events => throughput is 6.62E+05 events/s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105696663215774) differ by less than 2E-4 (2.9364318976377035e-08) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.671424e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.782154e+05 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -428,28 +358,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094184798437837] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4150s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3944s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0206s for 8192 events => throughput is 3.98E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5744s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5737s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.10E+07 events/s -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094186169585456) differ by less than 2E-4 (2.9002069412698006e-08) +OK! xsec from fortran (47.094184803756626) and hip (47.094184798437837) differ by less than 2E-4 (1.1293943558143837e-10) -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -461,84 +391,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1 - [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.5820s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3527s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2292s for 90112 events => throughput is 3.93E+05 events/s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.105695279989114) and cpp (47.105696663215774) differ by less than 2E-4 (2.9364318976377035e-08) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.998616e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.990048e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184798437830] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.8154s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8148s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.45E+07 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.094184803756640) and cuda (47.094184798437830) differ by less than 2E-4 (1.1293987967064822e-10) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -546,58 +400,56 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279068492] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7703s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7635s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0068s for 90112 events => throughput is 1.32E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.3718s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3640s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0078s for 90112 events => throughput is 1.16E+07 events/s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cuda (47.105695279068492) differ by less than 2E-4 (1.954369999168648e-11) +OK! xsec from fortran (47.105695279989114) and hip (47.105695279068492) differ by less than 2E-4 (1.954369999168648e-11) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.143723e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.270245e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.636090e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.032927e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.182886e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.810154e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.066867e+08 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.798767e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.180722e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.802290e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.146460e+08 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.004195e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.158610e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.771337e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.030823e+07 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.171072e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index 7f0ff41464..a6dfae25cd 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +make USEBUILDDIR=1 BACKEND=hip -make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2024-05-16_01:28:04 +DATE: 2024-05-16_20:44:40 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 + [XSECTION] Cross section = 0.1011 [0.10112317668354764] fbridge_mode=0 [UNWEIGHT] Wrote 365 events (found 1496 events) - [COUNTERS] PROGRAM TOTAL : 0.7020s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3669s - [COUNTERS] Fortran MEs ( 1 ) : 0.3350s for 8192 events => throughput is 2.45E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5850s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3824s + [COUNTERS] Fortran MEs ( 1 ) : 0.2026s for 8192 events => throughput is 4.04E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 + [XSECTION] Cross section = 0.1011 [0.10112317668354764] fbridge_mode=0 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6627s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3273s - [COUNTERS] Fortran MEs ( 1 ) : 0.3354s for 8192 events => throughput is 2.44E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4570s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2542s + [COUNTERS] Fortran MEs ( 1 ) : 0.2027s for 8192 events => throughput is 4.04E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/output_ggttg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x10_fortran > /tmp/valassia/output_ggttg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.2831s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5823s - [COUNTERS] Fortran MEs ( 1 ) : 3.7008s for 90112 events => throughput is 2.43E+04 events/s + [XSECTION] Cross section = 0.07924 [7.9239236471252555E-002] fbridge_mode=0 + [UNWEIGHT] Wrote 1899 events (found 1904 events) + [COUNTERS] PROGRAM TOTAL : 3.4255s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2010s + [COUNTERS] Fortran MEs ( 1 ) : 2.2245s for 90112 events => throughput is 4.05E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112317668354763] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 1.0127s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6679s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3448s for 8192 events => throughput is 2.38E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8439s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5616s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2822s for 8192 events => throughput is 2.90E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748607749110) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (0.10112317668354764) and cpp (0.10112317668354763) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238481932717666E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.7090s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9162s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.7928s for 90112 events => throughput is 2.38E+04 events/s + [XSECTION] Cross section = 0.07924 [7.9239236471252514E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1899 events (found 1904 events) + [COUNTERS] PROGRAM TOTAL : 4.5951s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4889s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.1062s for 90112 events => throughput is 2.90E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717666E-002) differ by less than 3E-14 (6.661338147750939e-16) +OK! xsec from fortran (7.9239236471252555E-002) and cpp (7.9239236471252514E-002) differ by less than 3E-14 (5.551115123125783e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.444412e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.979402e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.433186e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.982821e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748607748863] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112317668354515] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6869s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5073s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1796s for 8192 events => throughput is 4.56E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5372s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3981s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1391s for 8192 events => throughput is 5.89E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748607748863) differ by less than 3E-14 (2.453592884421596e-14) +OK! xsec from fortran (0.10112317668354764) and cpp (0.10112317668354515) differ by less than 3E-14 (2.475797344914099e-14) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238481932717666E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 3.7766s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7859s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.9906s for 90112 events => throughput is 4.53E+04 events/s + [XSECTION] Cross section = 0.07924 [7.9239236471252514E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1899 events (found 1904 events) + [COUNTERS] PROGRAM TOTAL : 2.8700s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3398s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.5302s for 90112 events => throughput is 5.89E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717666E-002) differ by less than 3E-14 (6.661338147750939e-16) +OK! xsec from fortran (7.9239236471252555E-002) and cpp (7.9239236471252514E-002) differ by less than 3E-14 (5.551115123125783e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.630294e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.074039e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.629345e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.093000e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112317668354763] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5178s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4262s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0916s for 8192 events => throughput is 8.94E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.3911s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3256s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0655s for 8192 events => throughput is 1.25E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748607749110) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (0.10112317668354764) and cpp (0.10112317668354763) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,190 +309,44 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.7021s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6979s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0042s for 90112 events => throughput is 8.97E+04 events/s + [XSECTION] Cross section = 0.07924 [7.9239236471252555E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1899 events (found 1904 events) + [COUNTERS] PROGRAM TOTAL : 2.0032s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2798s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7234s for 90112 events => throughput is 1.25E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717722E-002) differ by less than 3E-14 (0.0) +OK! xsec from fortran (7.9239236471252555E-002) and cpp (7.9239236471252555E-002) differ by less than 3E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.244917e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.257103e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.251295e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.258880e+05 ) sec^-1 -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 - [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4953s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4129s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0824s for 8192 events => throughput is 9.95E+04 events/s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748607749110) differ by less than 3E-14 (1.1102230246251565e-16) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.5931s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6918s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9014s for 90112 events => throughput is 1.00E+05 events/s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717722E-002) differ by less than 3E-14 (0.0) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.033892e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.035720e+05 ) sec^-1 +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 - [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5700s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4516s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1184s for 8192 events => throughput is 6.92E+04 events/s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748607749110) differ by less than 3E-14 (1.1102230246251565e-16) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 3.0137s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7172s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2965s for 90112 events => throughput is 6.95E+04 events/s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717722E-002) differ by less than 3E-14 (0.0) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.144179e+04 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.156532e+04 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -504,28 +358,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112317668354760] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.7726s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7672s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.52E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.5780s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5704s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0076s for 8192 events => throughput is 1.08E+06 events/s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cuda (0.10112748607749111) differ by less than 3E-14 (0.0) +OK! xsec from fortran (0.10112317668354764) and hip (0.10112317668354760) differ by less than 3E-14 (4.440892098500626e-16) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -537,67 +391,65 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238481932717736E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.0397s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0164s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0233s for 90112 events => throughput is 3.87E+06 events/s + [XSECTION] Cross section = 0.07924 [7.9239236471252555E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1899 events (found 1904 events) + [COUNTERS] PROGRAM TOTAL : 1.6103s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5267s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0836s for 90112 events => throughput is 1.08E+06 events/s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cuda (7.9238481932717736E-002) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (7.9239236471252555E-002) and hip (7.9239236471252555E-002) differ by less than 3E-14 (0.0) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.642318e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.120771e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.930638e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.148193e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.882259e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.680474e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.244433e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.302244e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.893041e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.676513e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.255841e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.840744e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.907568e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.666133e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.774192e+06 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.440619e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 1a8c36aa43..724ff0291a 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone - +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2024-05-16_01:28:48 +DATE: 2024-05-16_20:45:11 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 + [XSECTION] Cross section = 0.1011 [0.10112317668354764] fbridge_mode=0 [UNWEIGHT] Wrote 365 events (found 1496 events) - [COUNTERS] PROGRAM TOTAL : 0.7057s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3678s - [COUNTERS] Fortran MEs ( 1 ) : 0.3379s for 8192 events => throughput is 2.42E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4828s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2806s + [COUNTERS] Fortran MEs ( 1 ) : 0.2022s for 8192 events => throughput is 4.05E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 + [XSECTION] Cross section = 0.1011 [0.10112317668354764] fbridge_mode=0 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6650s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3295s - [COUNTERS] Fortran MEs ( 1 ) : 0.3355s for 8192 events => throughput is 2.44E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4601s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2576s + [COUNTERS] Fortran MEs ( 1 ) : 0.2025s for 8192 events => throughput is 4.04E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/output_ggttg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x10_fortran > /tmp/valassia/output_ggttg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.3113s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5981s - [COUNTERS] Fortran MEs ( 1 ) : 3.7132s for 90112 events => throughput is 2.43E+04 events/s + [XSECTION] Cross section = 0.07924 [7.9239236471252555E-002] fbridge_mode=0 + [UNWEIGHT] Wrote 1899 events (found 1904 events) + [COUNTERS] PROGRAM TOTAL : 3.4225s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1981s + [COUNTERS] Fortran MEs ( 1 ) : 2.2244s for 90112 events => throughput is 4.05E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112722621426752] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112291597608296] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.9881s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6537s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3344s for 8192 events => throughput is 2.45E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7804s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5209s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2595s for 8192 events => throughput is 3.16E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112722621426752) differ by less than 4E-4 (2.569659680817793e-06) +OK! xsec from fortran (0.10112317668354764) and cpp (0.10112291597608296) differ by less than 4E-4 (2.5781178285555484e-06) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238468310179624E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.5951s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9082s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.6869s for 90112 events => throughput is 2.44E+04 events/s + [XSECTION] Cross section = 0.07924 [7.9239221732791437E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1899 events (found 1904 events) + [COUNTERS] PROGRAM TOTAL : 4.3172s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4654s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.8518s for 90112 events => throughput is 3.16E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238468310179624E-002) differ by less than 4E-4 (1.719182115555995e-07) +OK! xsec from fortran (7.9239236471252555E-002) and cpp (7.9239221732791437E-002) differ by less than 4E-4 (1.8599953477416165e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.531268e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.253346e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.536337e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.249434e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112720710186394] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112290421591680] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5295s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4297s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0998s for 8192 events => throughput is 8.21E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4211s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3421s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0790s for 8192 events => throughput is 1.04E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112720710186394) differ by less than 4E-4 (2.758652844936371e-06) +OK! xsec from fortran (0.10112317668354764) and cpp (0.10112290421591680) differ by less than 4E-4 (2.6944132867079418e-06) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238454786658835E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.7829s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6780s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1050s for 90112 events => throughput is 8.16E+04 events/s + [XSECTION] Cross section = 0.07924 [7.9239212368085274E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1899 events (found 1904 events) + [COUNTERS] PROGRAM TOTAL : 2.1138s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2809s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8329s for 90112 events => throughput is 1.08E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238454786658835E-002) differ by less than 4E-4 (3.4258681169685445e-07) +OK! xsec from fortran (7.9239236471252555E-002) and cpp (7.9239212368085274E-002) differ by less than 4E-4 (3.0418222529693395e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.418674e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.092129e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.378970e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.100966e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112721766950902] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112291415112837] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4226s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3763s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0463s for 8192 events => throughput is 1.77E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3350s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3009s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0341s for 8192 events => throughput is 2.41E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112721766950902) differ by less than 4E-4 (2.654154597325764e-06) +OK! xsec from fortran (0.10112317668354764) and cpp (0.10112291415112837) differ by less than 4E-4 (2.5961646764605106e-06) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,190 +309,44 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238453735016964E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.1354s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6236s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5118s for 90112 events => throughput is 1.76E+05 events/s + [XSECTION] Cross section = 0.07924 [7.9239211617250407E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1899 events (found 1904 events) + [COUNTERS] PROGRAM TOTAL : 1.6114s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2370s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3743s for 90112 events => throughput is 2.41E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238453735016964E-002) differ by less than 4E-4 (3.5585866953180556e-07) +OK! xsec from fortran (7.9239236471252555E-002) and cpp (7.9239211617250407E-002) differ by less than 4E-4 (3.136577692020026e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.814902e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.818224e+05 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112721766950902] fbridge_mode=1 - [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4122s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3704s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0417s for 8192 events => throughput is 1.96E+05 events/s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112721766950902) differ by less than 4E-4 (2.654154597325764e-06) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238453735016964E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.0774s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6180s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4593s for 90112 events => throughput is 1.96E+05 events/s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238453735016964E-002) differ by less than 4E-4 (3.5585866953180556e-07) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.016267e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.453476e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.024583e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.457462e+05 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112723387847480] fbridge_mode=1 - [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4393s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3831s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0562s for 8192 events => throughput is 1.46E+05 events/s +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112723387847480) differ by less than 4E-4 (2.4938721023826105e-06) +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238464410949921E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.2433s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6283s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6150s for 90112 events => throughput is 1.47E+05 events/s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238464410949921E-002) differ by less than 4E-4 (2.211270000440635e-07) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.471448e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.496104e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -504,28 +358,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112726034625694] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112292787307366] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.7661s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7652s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0009s for 8192 events => throughput is 9.57E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6376s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6355s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0021s for 8192 events => throughput is 3.98E+06 events/s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cuda (0.10112726034625694) differ by less than 4E-4 (2.2321452152196386e-06) +OK! xsec from fortran (0.10112317668354764) and hip (0.10112292787307366) differ by less than 4E-4 (2.4604693221741414e-06) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -537,67 +391,65 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238473828077680E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.0217s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0116s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0101s for 90112 events => throughput is 8.94E+06 events/s + [XSECTION] Cross section = 0.07924 [7.9239222545537072E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1899 events (found 1904 events) + [COUNTERS] PROGRAM TOTAL : 1.5598s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5376s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0222s for 90112 events => throughput is 4.06E+06 events/s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cuda (7.9238473828077680E-002) differ by less than 4E-4 (1.0228161673175862e-07) +OK! xsec from fortran (7.9239236471252555E-002) and hip (7.9239222545537072E-002) differ by less than 4E-4 (1.7574267630049434e-07) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.279804e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.596097e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.849139e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.521296e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.708780e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.472410e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.376255e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.085314e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.741880e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.469858e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.526731e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.637900e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.576787e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.425874e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.628936e+07 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.261945e+06 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index 06cc385635..eb563a53e4 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +make USEBUILDDIR=1 BACKEND=hip - -make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2024-05-16_01:29:27 +DATE: 2024-05-16_20:45:39 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 + [XSECTION] Cross section = 0.1011 [0.10112317668354764] fbridge_mode=0 [UNWEIGHT] Wrote 365 events (found 1496 events) - [COUNTERS] PROGRAM TOTAL : 0.7042s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3668s - [COUNTERS] Fortran MEs ( 1 ) : 0.3374s for 8192 events => throughput is 2.43E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4826s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2802s + [COUNTERS] Fortran MEs ( 1 ) : 0.2024s for 8192 events => throughput is 4.05E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 + [XSECTION] Cross section = 0.1011 [0.10112317668354764] fbridge_mode=0 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6678s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3296s - [COUNTERS] Fortran MEs ( 1 ) : 0.3383s for 8192 events => throughput is 2.42E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4610s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2587s + [COUNTERS] Fortran MEs ( 1 ) : 0.2023s for 8192 events => throughput is 4.05E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/output_ggttg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x10_fortran > /tmp/valassia/output_ggttg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.3035s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5927s - [COUNTERS] Fortran MEs ( 1 ) : 3.7109s for 90112 events => throughput is 2.43E+04 events/s + [XSECTION] Cross section = 0.07924 [7.9239236471252555E-002] fbridge_mode=0 + [UNWEIGHT] Wrote 1899 events (found 1904 events) + [COUNTERS] PROGRAM TOTAL : 3.4431s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2093s + [COUNTERS] Fortran MEs ( 1 ) : 2.2338s for 90112 events => throughput is 4.03E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748700702684] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112317761225882] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 1.0211s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6702s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3509s for 8192 events => throughput is 2.33E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8361s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5448s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2912s for 8192 events => throughput is 2.81E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748700702684) differ by less than 2E-4 (9.191721828116783e-09) +OK! xsec from fortran (0.10112317668354764) and cpp (0.10112317761225882) differ by less than 2E-4 (9.183959592817814e-09) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238482679400354E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.7816s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9205s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.8611s for 90112 events => throughput is 2.33E+04 events/s + [XSECTION] Cross section = 0.07924 [7.9239237217958461E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1899 events (found 1904 events) + [COUNTERS] PROGRAM TOTAL : 4.7112s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5198s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.1914s for 90112 events => throughput is 2.82E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482679400354E-002) differ by less than 2E-4 (9.423232416594374e-09) +OK! xsec from fortran (7.9239236471252555E-002) and cpp (7.9239237217958461E-002) differ by less than 2E-4 (9.4234364755863e-09) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.398467e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.898005e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.396772e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.864631e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748702805033] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112317763556192] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6812s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5038s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1774s for 8192 events => throughput is 4.62E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5299s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3925s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1374s for 8192 events => throughput is 5.96E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748702805033) differ by less than 2E-4 (9.399612865834683e-09) +OK! xsec from fortran (0.10112317668354764) and cpp (0.10112317763556192) differ by less than 2E-4 (9.41440236879032e-09) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238482683055667E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 3.6988s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7524s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.9464s for 90112 events => throughput is 4.63E+04 events/s + [XSECTION] Cross section = 0.07924 [7.9239237221421968E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1899 events (found 1904 events) + [COUNTERS] PROGRAM TOTAL : 2.8737s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3594s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.5143s for 90112 events => throughput is 5.95E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482683055667E-002) differ by less than 2E-4 (9.469362849401364e-09) +OK! xsec from fortran (7.9239236471252555E-002) and cpp (7.9239237221421968E-002) differ by less than 2E-4 (9.467145956065792e-09) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.777911e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.146873e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.770421e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.162589e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748681415580] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112317741957558] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5041s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4149s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0892s for 8192 events => throughput is 9.19E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.3914s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3254s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0660s for 8192 events => throughput is 1.24E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748681415580) differ by less than 2E-4 (7.284514991212632e-09) +OK! xsec from fortran (0.10112317668354764) and cpp (0.10112317741957558) differ by less than 2E-4 (7.278528668663853e-09) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,190 +309,44 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238482534347232E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.6526s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6680s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9846s for 90112 events => throughput is 9.15E+04 events/s + [XSECTION] Cross section = 0.07924 [7.9239237072275287E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1899 events (found 1904 events) + [COUNTERS] PROGRAM TOTAL : 2.0008s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2746s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7262s for 90112 events => throughput is 1.24E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482534347232E-002) differ by less than 2E-4 (7.592642958798024e-09) +OK! xsec from fortran (7.9239236471252555E-002) and cpp (7.9239237072275287E-002) differ by less than 2E-4 (7.584913142011374e-09) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.374488e+04 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.304457e+04 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748681415580] fbridge_mode=1 - [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4867s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4079s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0788s for 8192 events => throughput is 1.04E+05 events/s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748681415580) differ by less than 2E-4 (7.284514991212632e-09) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238482534347232E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.5251s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6551s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8699s for 90112 events => throughput is 1.04E+05 events/s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482534347232E-002) differ by less than 2E-4 (7.592642958798024e-09) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.072957e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.274026e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.074127e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.274917e+05 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748700265108] fbridge_mode=1 - [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5672s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4475s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1197s for 8192 events => throughput is 6.85E+04 events/s +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748700265108) differ by less than 2E-4 (9.148451995955043e-09) +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238482666076374E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 3.0147s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7041s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.3105s for 90112 events => throughput is 6.88E+04 events/s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482666076374E-002) differ by less than 2E-4 (9.255082034087536e-09) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.810756e+04 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.935663e+04 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -504,28 +358,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748601943165] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112317662375726] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.7757s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7703s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.51E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.5947s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5871s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0076s for 8192 events => throughput is 1.08E+06 events/s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cuda (0.10112748601943165) differ by less than 2E-4 (5.74121417074025e-10) +OK! xsec from fortran (0.10112317668354764) and hip (0.10112317662375726) differ by less than 2E-4 (5.9126292750733e-10) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -537,67 +391,65 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238481937154381E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.0407s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0176s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0230s for 90112 events => throughput is 3.91E+06 events/s + [XSECTION] Cross section = 0.07924 [7.9239236476482192E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1899 events (found 1904 events) + [COUNTERS] PROGRAM TOTAL : 1.5831s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4995s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0837s for 90112 events => throughput is 1.08E+06 events/s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cuda (7.9238481937154381E-002) differ by less than 2E-4 (5.5991211667105745e-11) +OK! xsec from fortran (7.9239236471252555E-002) and hip (7.9239236476482192E-002) differ by less than 2E-4 (6.599809587726213e-11) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.631069e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.119462e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.120692e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.147711e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.856212e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.673894e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.234939e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.302029e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.866138e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.681043e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.243613e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.840526e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.862499e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.662870e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.731505e+06 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.434320e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index 744dd47e66..99da4b0a3f 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2024-05-16_01:30:11 +DATE: 2024-05-16_20:46:11 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.000387 [3.8704143122579712E-004] fbridge_mode=0 [UNWEIGHT] Wrote 62 events (found 950 events) - [COUNTERS] PROGRAM TOTAL : 4.8074s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3646s - [COUNTERS] Fortran MEs ( 1 ) : 4.4427s for 8192 events => throughput is 1.84E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.8627s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3581s + [COUNTERS] Fortran MEs ( 1 ) : 2.5046s for 8192 events => throughput is 3.27E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.000387 [3.8704143122579712E-004] fbridge_mode=0 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 4.8076s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3603s - [COUNTERS] Fortran MEs ( 1 ) : 4.4473s for 8192 events => throughput is 1.84E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7716s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2816s + [COUNTERS] Fortran MEs ( 1 ) : 2.4900s for 8192 events => throughput is 3.29E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/output_ggttgg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x10_fortran > /tmp/valassia/output_ggttgg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=0 - [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 50.3676s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1555s - [COUNTERS] Fortran MEs ( 1 ) : 48.2121s for 90112 events => throughput is 1.87E+03 events/s + [XSECTION] Cross section = 0.0001579 [1.5793532411914656E-004] fbridge_mode=0 + [UNWEIGHT] Wrote 1358 events (found 1880 events) + [COUNTERS] PROGRAM TOTAL : 29.0583s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5955s + [COUNTERS] Fortran MEs ( 1 ) : 27.4629s for 90112 events => throughput is 3.28E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703612510102372E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8704143122579739E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 9.2196s - [COUNTERS] Fortran Overhead ( 0 ) : 4.7253s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.4943s for 8192 events => throughput is 1.82E+03 events/s + [COUNTERS] PROGRAM TOTAL : 7.8333s + [COUNTERS] Fortran Overhead ( 0 ) : 4.0232s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.8101s for 8192 events => throughput is 2.15E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612510102372E-004) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (3.8704143122579712E-004) and cpp (3.8704143122579739E-004) differ by less than 3E-14 (6.661338147750939e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793438642451704E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 56.2683s - [COUNTERS] Fortran Overhead ( 0 ) : 6.5201s - [COUNTERS] CudaCpp MEs ( 2 ) : 49.7482s for 90112 events => throughput is 1.81E+03 events/s + [XSECTION] Cross section = 0.0001579 [1.5793532411914648E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1358 events (found 1880 events) + [COUNTERS] PROGRAM TOTAL : 47.4261s + [COUNTERS] Fortran Overhead ( 0 ) : 5.3494s + [COUNTERS] CudaCpp MEs ( 2 ) : 42.0767s for 90112 events => throughput is 2.14E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438642451704E-004) differ by less than 3E-14 (5.551115123125783e-16) +OK! xsec from fortran (1.5793532411914656E-004) and cpp (1.5793532411914648E-004) differ by less than 3E-14 (5.551115123125783e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.868635e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.170234e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.874481e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.210789e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703612510102367E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8704143122579739E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 5.0549s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6552s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.3997s for 8192 events => throughput is 3.41E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.5775s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9100s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.6674s for 8192 events => throughput is 4.91E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612510102367E-004) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (3.8704143122579712E-004) and cpp (3.8704143122579739E-004) differ by less than 3E-14 (6.661338147750939e-16) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793438642451701E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 30.7729s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4162s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.3568s for 90112 events => throughput is 3.42E+03 events/s + [XSECTION] Cross section = 0.0001579 [1.5793532411914648E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1358 events (found 1880 events) + [COUNTERS] PROGRAM TOTAL : 21.5173s + [COUNTERS] Fortran Overhead ( 0 ) : 3.2215s + [COUNTERS] CudaCpp MEs ( 2 ) : 18.2958s for 90112 events => throughput is 4.93E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438642451701E-004) differ by less than 3E-14 (6.661338147750939e-16) +OK! xsec from fortran (1.5793532411914656E-004) and cpp (1.5793532411914648E-004) differ by less than 3E-14 (5.551115123125783e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.598914e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.067248e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.606768e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.067497e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703612510102367E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8704143122579728E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 2.4142s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3728s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0413s for 8192 events => throughput is 7.87E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.6695s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9666s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7029s for 8192 events => throughput is 1.17E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612510102367E-004) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (3.8704143122579712E-004) and cpp (3.8704143122579728E-004) differ by less than 3E-14 (4.440892098500626e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,190 +309,44 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 14.6025s - [COUNTERS] Fortran Overhead ( 0 ) : 3.1268s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.4757s for 90112 events => throughput is 7.85E+03 events/s + [XSECTION] Cross section = 0.0001579 [1.5793532411914648E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1358 events (found 1880 events) + [COUNTERS] PROGRAM TOTAL : 10.0128s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2748s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.7380s for 90112 events => throughput is 1.16E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438642451712E-004) differ by less than 3E-14 (0.0) +OK! xsec from fortran (1.5793532411914656E-004) and cpp (1.5793532411914648E-004) differ by less than 3E-14 (5.551115123125783e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.082204e+03 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.087591e+03 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703612510102367E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 2.1594s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2438s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9156s for 8192 events => throughput is 8.95E+03 events/s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612510102367E-004) differ by less than 3E-14 (2.220446049250313e-16) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 13.1097s - [COUNTERS] Fortran Overhead ( 0 ) : 3.0095s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.1002s for 90112 events => throughput is 8.92E+03 events/s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438642451712E-004) differ by less than 3E-14 (0.0) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.157056e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.193787e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.190937e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.204524e+04 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703612510102367E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 2.7205s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5345s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1860s for 8192 events => throughput is 6.91E+03 events/s +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612510102367E-004) differ by less than 3E-14 (2.220446049250313e-16) +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 16.4021s - [COUNTERS] Fortran Overhead ( 0 ) : 3.3045s - [COUNTERS] CudaCpp MEs ( 2 ) : 13.0976s for 90112 events => throughput is 6.88E+03 events/s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438642451712E-004) differ by less than 3E-14 (0.0) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.943395e+03 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.968493e+03 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -504,28 +358,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703612510102367E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8704143122579723E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 0.9037s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8707s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0330s for 8192 events => throughput is 2.48E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9424s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8292s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1132s for 8192 events => throughput is 7.24E+04 events/s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8703612510102356E-004) and cuda (3.8703612510102367E-004) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (3.8704143122579712E-004) and hip (3.8704143122579723E-004) differ by less than 3E-14 (2.220446049250313e-16) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -537,67 +391,65 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 2.9957s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6318s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3638s for 90112 events => throughput is 2.48E+05 events/s + [XSECTION] Cross section = 0.0001579 [1.5793532411914653E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1358 events (found 1880 events) + [COUNTERS] PROGRAM TOTAL : 3.3277s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0786s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2491s for 90112 events => throughput is 7.21E+04 events/s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793438642451712E-004) and cuda (1.5793438642451712E-004) differ by less than 3E-14 (0.0) +OK! xsec from fortran (1.5793532411914656E-004) and hip (1.5793532411914653E-004) differ by less than 3E-14 (2.220446049250313e-16) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.275863e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.211368e+04 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.513394e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.451928e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.126700e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.244902e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.163753e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.041753e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.128674e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.233879e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.183392e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.227863e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.130320e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.245185e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.455396e+05 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.390943e+04 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 97726609cd..0b8c0a4e92 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg - -make USEBUILDDIR=1 BACKEND=cuda +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2024-05-16_01:34:39 +DATE: 2024-05-16_20:49:52 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.000387 [3.8704143122579712E-004] fbridge_mode=0 [UNWEIGHT] Wrote 62 events (found 950 events) - [COUNTERS] PROGRAM TOTAL : 4.7506s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3591s - [COUNTERS] Fortran MEs ( 1 ) : 4.3916s for 8192 events => throughput is 1.87E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7635s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2735s + [COUNTERS] Fortran MEs ( 1 ) : 2.4899s for 8192 events => throughput is 3.29E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.000387 [3.8704143122579712E-004] fbridge_mode=0 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 4.7327s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3562s - [COUNTERS] Fortran MEs ( 1 ) : 4.3765s for 8192 events => throughput is 1.87E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7627s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2744s + [COUNTERS] Fortran MEs ( 1 ) : 2.4883s for 8192 events => throughput is 3.29E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/output_ggttgg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x10_fortran > /tmp/valassia/output_ggttgg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=0 - [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 50.4568s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1771s - [COUNTERS] Fortran MEs ( 1 ) : 48.2797s for 90112 events => throughput is 1.87E+03 events/s + [XSECTION] Cross section = 0.0001579 [1.5793532411914656E-004] fbridge_mode=0 + [UNWEIGHT] Wrote 1358 events (found 1880 events) + [COUNTERS] PROGRAM TOTAL : 28.9764s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6009s + [COUNTERS] Fortran MEs ( 1 ) : 27.3755s for 90112 events => throughput is 3.29E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703729438336302E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8704259755238570E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 8.9135s - [COUNTERS] Fortran Overhead ( 0 ) : 4.5714s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.3421s for 8192 events => throughput is 1.89E+03 events/s + [COUNTERS] PROGRAM TOTAL : 6.9987s + [COUNTERS] Fortran Overhead ( 0 ) : 3.6137s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.3850s for 8192 events => throughput is 2.42E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703729438336302E-004) differ by less than 4E-4 (3.021119383106452e-06) +OK! xsec from fortran (3.8704143122579712E-004) and cpp (3.8704259755238570E-004) differ by less than 4E-4 (3.0134411834747965e-06) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793486626492658E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 54.1190s - [COUNTERS] Fortran Overhead ( 0 ) : 6.3214s - [COUNTERS] CudaCpp MEs ( 2 ) : 47.7976s for 90112 events => throughput is 1.89E+03 events/s + [XSECTION] Cross section = 0.0001579 [1.5793580182117605E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1358 events (found 1880 events) + [COUNTERS] PROGRAM TOTAL : 42.1241s + [COUNTERS] Fortran Overhead ( 0 ) : 4.8988s + [COUNTERS] CudaCpp MEs ( 2 ) : 37.2252s for 90112 events => throughput is 2.42E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793486626492658E-004) differ by less than 4E-4 (3.0382263187522796e-06) +OK! xsec from fortran (1.5793532411914656E-004) and cpp (1.5793580182117605E-004) differ by less than 4E-4 (3.024668687290344e-06) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.947180e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.473943e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.947353e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.477038e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703722581317850E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8704254541054809E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 2.7184s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5261s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1923s for 8192 events => throughput is 6.87E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.9623s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1144s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8479s for 8192 events => throughput is 9.66E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703722581317850E-004) differ by less than 4E-4 (2.843951981690296e-06) +OK! xsec from fortran (3.8704143122579712E-004) and cpp (3.8704254541054809E-004) differ by less than 4E-4 (2.8787221757475834e-06) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793483759856148E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 16.5068s - [COUNTERS] Fortran Overhead ( 0 ) : 3.3022s - [COUNTERS] CudaCpp MEs ( 2 ) : 13.2046s for 90112 events => throughput is 6.82E+03 events/s + [XSECTION] Cross section = 0.0001579 [1.5793578161882866E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1358 events (found 1880 events) + [COUNTERS] PROGRAM TOTAL : 11.6999s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4337s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.2662s for 90112 events => throughput is 9.72E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793483759856148E-004) differ by less than 4E-4 (2.856718252175483e-06) +OK! xsec from fortran (1.5793532411914656E-004) and cpp (1.5793578161882866E-004) differ by less than 4E-4 (2.896753368286653e-06) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.978396e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.973910e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.983793e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.975004e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703722425602170E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8704254166302247E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 1.4122s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8788s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5334s for 8192 events => throughput is 1.54E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.9782s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6223s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3560s for 8192 events => throughput is 2.30E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703722425602170E-004) differ by less than 4E-4 (2.8399286962077497e-06) +OK! xsec from fortran (3.8704143122579712E-004) and cpp (3.8704254166302247E-004) differ by less than 4E-4 (2.8690396836061893e-06) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,190 +309,44 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793483698376133E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 8.5565s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6677s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.8888s for 90112 events => throughput is 1.53E+04 events/s + [XSECTION] Cross section = 0.0001579 [1.5793578009696313E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1358 events (found 1880 events) + [COUNTERS] PROGRAM TOTAL : 5.8301s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9300s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.9001s for 90112 events => throughput is 2.31E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793483698376133E-004) differ by less than 4E-4 (2.852825495613942e-06) +OK! xsec from fortran (1.5793532411914656E-004) and cpp (1.5793578009696313E-004) differ by less than 4E-4 (2.887117363403746e-06) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.577584e+04 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.577489e+04 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703722425602170E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 1.2916s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8193s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4723s for 8192 events => throughput is 1.73E+04 events/s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703722425602170E-004) differ by less than 4E-4 (2.8399286962077497e-06) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793483698376133E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 7.7633s - [COUNTERS] Fortran Overhead ( 0 ) : 2.5957s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.1676s for 90112 events => throughput is 1.74E+04 events/s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793483698376133E-004) differ by less than 4E-4 (2.852825495613942e-06) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.818661e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.390345e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.824534e+04 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703728658657426E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 1.5274s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9418s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5856s for 8192 events => throughput is 1.40E+04 events/s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703728658657426E-004) differ by less than 4E-4 (3.0009745224379714e-06) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.382982e+04 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793486977281547E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 9.1749s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6987s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.4761s for 90112 events => throughput is 1.39E+04 events/s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793486977281547E-004) differ by less than 4E-4 (3.0604373708609245e-06) +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.413533e+04 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.415193e+04 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -504,28 +358,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703736267486325E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8704261630635685E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 0.8657s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8443s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0214s for 8192 events => throughput is 3.83E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7928s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7366s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0562s for 8192 events => throughput is 1.46E+05 events/s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8703612510102356E-004) and cuda (3.8703736267486325E-004) differ by less than 4E-4 (3.1975667371675343e-06) +OK! xsec from fortran (3.8704143122579712E-004) and hip (3.8704261630635685E-004) differ by less than 4E-4 (3.0618958697381515e-06) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -537,67 +391,65 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793489323670813E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 2.8423s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6065s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2358s for 90112 events => throughput is 3.82E+05 events/s + [XSECTION] Cross section = 0.0001579 [1.5793580869662166E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1358 events (found 1880 events) + [COUNTERS] PROGRAM TOTAL : 2.6145s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9943s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6203s for 90112 events => throughput is 1.45E+05 events/s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793438642451712E-004) and cuda (1.5793489323670813E-004) differ by less than 4E-4 (3.20900471706409e-06) +OK! xsec from fortran (1.5793532411914656E-004) and hip (1.5793580869662166E-004) differ by less than 4E-4 (3.0682019858119247e-06) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.583101e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.469589e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.931306e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.787102e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.570292e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.703222e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.724382e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.298408e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.573894e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.698763e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.720622e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.028870e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.518798e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.692814e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.538253e+05 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.381352e+04 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 9161616d22..c20ccea3fc 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg - -make USEBUILDDIR=1 BACKEND=cuda +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppnone + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2024-05-16_01:38:10 +DATE: 2024-05-16_20:52:51 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.000387 [3.8704143122579712E-004] fbridge_mode=0 [UNWEIGHT] Wrote 62 events (found 950 events) - [COUNTERS] PROGRAM TOTAL : 4.7461s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3596s - [COUNTERS] Fortran MEs ( 1 ) : 4.3864s for 8192 events => throughput is 1.87E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7671s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2738s + [COUNTERS] Fortran MEs ( 1 ) : 2.4933s for 8192 events => throughput is 3.29E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.000387 [3.8704143122579712E-004] fbridge_mode=0 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 4.7384s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3570s - [COUNTERS] Fortran MEs ( 1 ) : 4.3814s for 8192 events => throughput is 1.87E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7707s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2809s + [COUNTERS] Fortran MEs ( 1 ) : 2.4899s for 8192 events => throughput is 3.29E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/output_ggttgg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x10_fortran > /tmp/valassia/output_ggttgg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=0 - [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 50.4226s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1630s - [COUNTERS] Fortran MEs ( 1 ) : 48.2596s for 90112 events => throughput is 1.87E+03 events/s + [XSECTION] Cross section = 0.0001579 [1.5793532411914656E-004] fbridge_mode=0 + [UNWEIGHT] Wrote 1358 events (found 1880 events) + [COUNTERS] PROGRAM TOTAL : 28.9717s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5892s + [COUNTERS] Fortran MEs ( 1 ) : 27.3825s for 90112 events => throughput is 3.29E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703612659176674E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8704143272044121E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 9.4283s - [COUNTERS] Fortran Overhead ( 0 ) : 4.8187s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.6095s for 8192 events => throughput is 1.78E+03 events/s + [COUNTERS] PROGRAM TOTAL : 7.8348s + [COUNTERS] Fortran Overhead ( 0 ) : 4.0029s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.8319s for 8192 events => throughput is 2.14E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612659176674E-004) differ by less than 2E-4 (3.851690077993908e-09) +OK! xsec from fortran (3.8704143122579712E-004) and cpp (3.8704143272044121E-004) differ by less than 2E-4 (3.861716058040088e-09) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793438704534934E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 56.9207s - [COUNTERS] Fortran Overhead ( 0 ) : 6.6165s - [COUNTERS] CudaCpp MEs ( 2 ) : 50.3042s for 90112 events => throughput is 1.79E+03 events/s + [XSECTION] Cross section = 0.0001579 [1.5793532474032691E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1358 events (found 1880 events) + [COUNTERS] PROGRAM TOTAL : 47.5780s + [COUNTERS] Fortran Overhead ( 0 ) : 5.3115s + [COUNTERS] CudaCpp MEs ( 2 ) : 42.2665s for 90112 events => throughput is 2.13E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438704534934E-004) differ by less than 2E-4 (3.930950231989527e-09) +OK! xsec from fortran (1.5793532411914656E-004) and cpp (1.5793532474032691E-004) differ by less than 2E-4 (3.933131154099101e-09) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.845749e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.211221e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.855084e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.210147e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703612692816703E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8704143304774347E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 5.0818s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6854s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.3963s for 8192 events => throughput is 3.42E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.5369s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8882s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.6487s for 8192 events => throughput is 4.97E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612692816703E-004) differ by less than 2E-4 (4.720860369289426e-09) +OK! xsec from fortran (3.8704143122579712E-004) and cpp (3.8704143304774347E-004) differ by less than 2E-4 (4.707367828871156e-09) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793438707226035E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 30.7891s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4469s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.3422s for 90112 events => throughput is 3.42E+03 events/s + [XSECTION] Cross section = 0.0001579 [1.5793532476698221E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1358 events (found 1880 events) + [COUNTERS] PROGRAM TOTAL : 21.2864s + [COUNTERS] Fortran Overhead ( 0 ) : 3.1918s + [COUNTERS] CudaCpp MEs ( 2 ) : 18.0946s for 90112 events => throughput is 4.98E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438707226035E-004) differ by less than 2E-4 (4.1013439311399225e-09) +OK! xsec from fortran (1.5793532411914656E-004) and cpp (1.5793532476698221E-004) differ by less than 2E-4 (4.101904815811963e-09) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.522464e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.124073e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.523204e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.105635e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703612675240517E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8704143287857844E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 2.3962s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3623s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0339s for 8192 events => throughput is 7.92E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.6358s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9511s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6847s for 8192 events => throughput is 1.20E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612675240517E-004) differ by less than 2E-4 (4.266737629876616e-09) +OK! xsec from fortran (3.8704143122579712E-004) and cpp (3.8704143287857844E-004) differ by less than 2E-4 (4.2702956726259345e-09) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,190 +309,44 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793438703631775E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 14.5036s - [COUNTERS] Fortran Overhead ( 0 ) : 3.1328s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.3708s for 90112 events => throughput is 7.92E+03 events/s + [XSECTION] Cross section = 0.0001579 [1.5793532473043530E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1358 events (found 1880 events) + [COUNTERS] PROGRAM TOTAL : 9.7635s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2583s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.5053s for 90112 events => throughput is 1.20E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438703631775E-004) differ by less than 2E-4 (3.873764420347925e-09) +OK! xsec from fortran (1.5793532411914656E-004) and cpp (1.5793532473043530E-004) differ by less than 2E-4 (3.870500364655527e-09) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.125066e+03 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.074324e+03 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703612675240517E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 2.1502s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2343s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9159s for 8192 events => throughput is 8.94E+03 events/s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612675240517E-004) differ by less than 2E-4 (4.266737629876616e-09) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793438703631775E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 12.9975s - [COUNTERS] Fortran Overhead ( 0 ) : 3.0012s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.9962s for 90112 events => throughput is 9.01E+03 events/s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438703631775E-004) differ by less than 2E-4 (3.873764420347925e-09) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.351869e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.234140e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.208773e+03 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703612675240517E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 2.7414s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5394s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2020s for 8192 events => throughput is 6.82E+03 events/s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612675240517E-004) differ by less than 2E-4 (4.266737629876616e-09) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.234326e+04 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793438703631775E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 16.5337s - [COUNTERS] Fortran Overhead ( 0 ) : 3.3003s - [COUNTERS] CudaCpp MEs ( 2 ) : 13.2334s for 90112 events => throughput is 6.81E+03 events/s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438703631775E-004) differ by less than 2E-4 (3.873764420347925e-09) +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.881370e+03 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.943865e+03 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -504,28 +358,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703612512203166E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8704143124638075E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 0.9014s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8685s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0329s for 8192 events => throughput is 2.49E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9328s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8187s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1141s for 8192 events => throughput is 7.18E+04 events/s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8703612510102356E-004) and cuda (3.8703612512203166E-004) differ by less than 2E-4 (5.427946980773868e-11) +OK! xsec from fortran (3.8704143122579712E-004) and hip (3.8704143124638075E-004) differ by less than 2E-4 (5.318190332559425e-11) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -537,67 +391,65 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793438642387717E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 2.9899s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6262s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3637s for 90112 events => throughput is 2.48E+05 events/s + [XSECTION] Cross section = 0.0001579 [1.5793532411887058E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1358 events (found 1880 events) + [COUNTERS] PROGRAM TOTAL : 3.3216s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0701s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2516s for 90112 events => throughput is 7.20E+04 events/s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793438642451712E-004) and cuda (1.5793438642387717E-004) differ by less than 2E-4 (4.051980972974434e-12) +OK! xsec from fortran (1.5793532411914656E-004) and hip (1.5793532411887058E-004) differ by less than 2E-4 (1.7474910407599964e-12) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.280457e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.192995e+04 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.523385e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.462358e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.121733e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.248895e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.162091e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.020189e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.125438e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.244252e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.168234e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.225383e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.125929e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.244523e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.451563e+05 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.379421e+04 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index f87c8c9cf1..193cac0b99 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' + make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2024-05-16_01:43:56 +DATE: 2024-05-16_20:42:12 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 + [XSECTION] Cross section = 1.24e-06 [1.2403628942014972E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 101.7235s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5049s - [COUNTERS] Fortran MEs ( 1 ) : 101.2186s for 8192 events => throughput is 8.09E+01 events/s + [COUNTERS] PROGRAM TOTAL : 54.5168s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3652s + [COUNTERS] Fortran MEs ( 1 ) : 54.1516s for 8192 events => throughput is 1.51E+02 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 + [XSECTION] Cross section = 1.24e-06 [1.2403628942014972E-006] fbridge_mode=0 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 101.7703s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5109s - [COUNTERS] Fortran MEs ( 1 ) : 101.2594s for 8192 events => throughput is 8.09E+01 events/s + [COUNTERS] PROGRAM TOTAL : 54.6451s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3696s + [COUNTERS] Fortran MEs ( 1 ) : 54.2755s for 8192 events => throughput is 1.51E+02 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x10_fortran > /tmp/valassia/output_ggttggg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.332e-07 [2.3322783648085419E-007] fbridge_mode=0 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1119.4272s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4297s - [COUNTERS] Fortran MEs ( 1 ) : 1114.9976s for 90112 events => throughput is 8.08E+01 events/s + [COUNTERS] PROGRAM TOTAL : 600.0134s + [COUNTERS] Fortran Overhead ( 0 ) : 3.0257s + [COUNTERS] Fortran MEs ( 1 ) : 596.9877s for 90112 events => throughput is 1.51E+02 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985227939193E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.24e-06 [1.2403628942015001E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 236.4243s - [COUNTERS] Fortran Overhead ( 0 ) : 108.7156s - [COUNTERS] CudaCpp MEs ( 2 ) : 127.7087s for 8192 events => throughput is 6.41E+01 events/s + [COUNTERS] PROGRAM TOTAL : 175.8983s + [COUNTERS] Fortran Overhead ( 0 ) : 80.1134s + [COUNTERS] CudaCpp MEs ( 2 ) : 95.7848s for 8192 events => throughput is 8.55E+01 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985227939193E-006) differ by less than 3E-14 (1.5543122344752192e-15) +OK! xsec from fortran (1.2403628942014972E-006) and cpp (1.2403628942015001E-006) differ by less than 3E-14 (2.4424906541753444e-15) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.332e-07 [2.3322783648085453E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1507.2346s - [COUNTERS] Fortran Overhead ( 0 ) : 112.2012s - [COUNTERS] CudaCpp MEs ( 2 ) : 1395.0334s for 90112 events => throughput is 6.46E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1134.7119s + [COUNTERS] Fortran Overhead ( 0 ) : 82.6410s + [COUNTERS] CudaCpp MEs ( 2 ) : 1052.0709s for 90112 events => throughput is 8.57E+01 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656014E-007) differ by less than 3E-14 (1.9984014443252818e-15) +OK! xsec from fortran (2.3322783648085419E-007) and cpp (2.3322783648085453E-007) differ by less than 3E-14 (1.5543122344752192e-15) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.611342e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.033544e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.612518e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.031382e+02 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985227939197E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.24e-06 [1.2403628942015003E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 114.8253s - [COUNTERS] Fortran Overhead ( 0 ) : 52.9129s - [COUNTERS] CudaCpp MEs ( 2 ) : 61.9124s for 8192 events => throughput is 1.32E+02 events/s + [COUNTERS] PROGRAM TOTAL : 81.8493s + [COUNTERS] Fortran Overhead ( 0 ) : 36.9991s + [COUNTERS] CudaCpp MEs ( 2 ) : 44.8502s for 8192 events => throughput is 1.83E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985227939197E-006) differ by less than 3E-14 (1.7763568394002505e-15) +OK! xsec from fortran (1.2403628942014972E-006) and cpp (1.2403628942015003E-006) differ by less than 3E-14 (2.6645352591003757e-15) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993086656017E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.332e-07 [2.3322783648085448E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 734.6001s - [COUNTERS] Fortran Overhead ( 0 ) : 56.8950s - [COUNTERS] CudaCpp MEs ( 2 ) : 677.7051s for 90112 events => throughput is 1.33E+02 events/s + [COUNTERS] PROGRAM TOTAL : 531.7480s + [COUNTERS] Fortran Overhead ( 0 ) : 39.7110s + [COUNTERS] CudaCpp MEs ( 2 ) : 492.0370s for 90112 events => throughput is 1.83E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656017E-007) differ by less than 3E-14 (2.220446049250313e-15) +OK! xsec from fortran (2.3322783648085419E-007) and cpp (2.3322783648085448E-007) differ by less than 3E-14 (1.3322676295501878e-15) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.573216e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.254594e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.570652e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.249864e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.24e-06 [1.2403628942015001E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 53.5594s - [COUNTERS] Fortran Overhead ( 0 ) : 24.8692s - [COUNTERS] CudaCpp MEs ( 2 ) : 28.6902s for 8192 events => throughput is 2.86E+02 events/s + [COUNTERS] PROGRAM TOTAL : 35.4827s + [COUNTERS] Fortran Overhead ( 0 ) : 16.2185s + [COUNTERS] CudaCpp MEs ( 2 ) : 19.2643s for 8192 events => throughput is 4.25E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985227939191E-006) differ by less than 3E-14 (1.3322676295501878e-15) +OK! xsec from fortran (1.2403628942014972E-006) and cpp (1.2403628942015001E-006) differ by less than 3E-14 (2.4424906541753444e-15) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,190 +309,44 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.332e-07 [2.3322783648085445E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 345.8816s - [COUNTERS] Fortran Overhead ( 0 ) : 28.6165s - [COUNTERS] CudaCpp MEs ( 2 ) : 317.2651s for 90112 events => throughput is 2.84E+02 events/s + [COUNTERS] PROGRAM TOTAL : 231.8248s + [COUNTERS] Fortran Overhead ( 0 ) : 18.8357s + [COUNTERS] CudaCpp MEs ( 2 ) : 212.9890s for 90112 events => throughput is 4.23E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656014E-007) differ by less than 3E-14 (1.9984014443252818e-15) +OK! xsec from fortran (2.3322783648085419E-007) and cpp (2.3322783648085445E-007) differ by less than 3E-14 (1.1102230246251565e-15) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.346027e+02 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.397864e+02 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 47.5433s - [COUNTERS] Fortran Overhead ( 0 ) : 21.7991s - [COUNTERS] CudaCpp MEs ( 2 ) : 25.7442s for 8192 events => throughput is 3.18E+02 events/s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985227939191E-006) differ by less than 3E-14 (1.3322676295501878e-15) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 310.2994s - [COUNTERS] Fortran Overhead ( 0 ) : 25.7446s - [COUNTERS] CudaCpp MEs ( 2 ) : 284.5548s for 90112 events => throughput is 3.17E+02 events/s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656014E-007) differ by less than 3E-14 (1.9984014443252818e-15) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.866314e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.171079e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.859864e+02 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 50.4926s - [COUNTERS] Fortran Overhead ( 0 ) : 24.7479s - [COUNTERS] CudaCpp MEs ( 2 ) : 25.7447s for 8192 events => throughput is 3.18E+02 events/s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985227939191E-006) differ by less than 3E-14 (1.3322676295501878e-15) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.226924e+02 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 313.6701s - [COUNTERS] Fortran Overhead ( 0 ) : 28.5625s - [COUNTERS] CudaCpp MEs ( 2 ) : 285.1075s for 90112 events => throughput is 3.16E+02 events/s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656014E-007) differ by less than 3E-14 (1.9984014443252818e-15) +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.394651e+02 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.384790e+02 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -504,28 +358,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985227939195E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.24e-06 [1.2403628942015003E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 4.2708s - [COUNTERS] Fortran Overhead ( 0 ) : 3.1879s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0829s for 8192 events => throughput is 7.56E+03 events/s + [COUNTERS] PROGRAM TOTAL : 11.7059s + [COUNTERS] Fortran Overhead ( 0 ) : 7.8191s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.8867s for 8192 events => throughput is 2.11E+03 events/s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939174E-006) and cuda (1.2403985227939195E-006) differ by less than 3E-14 (1.7763568394002505e-15) +OK! xsec from fortran (1.2403628942014972E-006) and hip (1.2403628942015003E-006) differ by less than 3E-14 (2.6645352591003757e-15) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -537,67 +391,65 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993086656006E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.332e-07 [2.3322783648085437E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 19.1407s - [COUNTERS] Fortran Overhead ( 0 ) : 7.1896s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.9511s for 90112 events => throughput is 7.54E+03 events/s + [COUNTERS] PROGRAM TOTAL : 52.7736s + [COUNTERS] Fortran Overhead ( 0 ) : 10.2335s + [COUNTERS] CudaCpp MEs ( 2 ) : 42.5400s for 90112 events => throughput is 2.12E+03 events/s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655967E-007) and cuda (2.3322993086656006E-007) differ by less than 3E-14 (1.7763568394002505e-15) +OK! xsec from fortran (2.3322783648085419E-007) and hip (2.3322783648085437E-007) differ by less than 3E-14 (8.881784197001252e-16) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.518899e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.152133e+03 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.266687e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.191143e+03 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.285867e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.527095e+03 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.577065e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.455198e+03 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.302340e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.518810e+03 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.485177e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.486020e+03 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.239249e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.521294e+03 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.236704e+03 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.125470e+03 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index 9938780c0a..a47883d8b6 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +make USEBUILDDIR=1 BACKEND=hip - - -make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' + make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2024-05-16_03:16:41 +DATE: 2024-05-16_21:37:36 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 + [XSECTION] Cross section = 1.24e-06 [1.2403628942014972E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 101.6786s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5071s - [COUNTERS] Fortran MEs ( 1 ) : 101.1715s for 8192 events => throughput is 8.10E+01 events/s + [COUNTERS] PROGRAM TOTAL : 54.4965s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3691s + [COUNTERS] Fortran MEs ( 1 ) : 54.1274s for 8192 events => throughput is 1.51E+02 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 + [XSECTION] Cross section = 1.24e-06 [1.2403628942014972E-006] fbridge_mode=0 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 102.1420s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5108s - [COUNTERS] Fortran MEs ( 1 ) : 101.6312s for 8192 events => throughput is 8.06E+01 events/s + [COUNTERS] PROGRAM TOTAL : 54.3426s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3756s + [COUNTERS] Fortran MEs ( 1 ) : 53.9670s for 8192 events => throughput is 1.52E+02 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x10_fortran > /tmp/valassia/output_ggttggg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.332e-07 [2.3322783648085419E-007] fbridge_mode=0 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1119.6489s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4200s - [COUNTERS] Fortran MEs ( 1 ) : 1115.2289s for 90112 events => throughput is 8.08E+01 events/s + [COUNTERS] PROGRAM TOTAL : 598.8564s + [COUNTERS] Fortran Overhead ( 0 ) : 3.0254s + [COUNTERS] Fortran MEs ( 1 ) : 595.8311s for 90112 events => throughput is 1.51E+02 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,23 +124,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.241e-06 [1.2405719957040752E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.241e-06 [1.2405363572559468E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 207.9761s - [COUNTERS] Fortran Overhead ( 0 ) : 95.5518s - [COUNTERS] CudaCpp MEs ( 2 ) : 112.4243s for 8192 events => throughput is 7.29E+01 events/s + [COUNTERS] PROGRAM TOTAL : 162.8380s + [COUNTERS] Fortran Overhead ( 0 ) : 74.6711s + [COUNTERS] CudaCpp MEs ( 2 ) : 88.1669s for 8192 events => throughput is 9.29E+01 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2405719957040752E-006) differ by less than 4E-4 (0.00013985256106807675) +OK! xsec from fortran (1.2403628942014972E-006) and cpp (1.2405363572559468E-006) differ by less than 4E-4 (0.00013984863241267576) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -158,37 +158,37 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.333e-07 [2.3326290771198648E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.333e-07 [2.3326080615569212E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1342.0233s - [COUNTERS] Fortran Overhead ( 0 ) : 99.5419s - [COUNTERS] CudaCpp MEs ( 2 ) : 1242.4814s for 90112 events => throughput is 7.25E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1045.1681s + [COUNTERS] Fortran Overhead ( 0 ) : 77.2023s + [COUNTERS] CudaCpp MEs ( 2 ) : 967.9658s for 90112 events => throughput is 9.31E+01 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326290771198648E-007) differ by less than 4E-4 (0.00014139199589124907) +OK! xsec from fortran (2.3322783648085419E-007) and cpp (2.3326080615569212E-007) differ by less than 4E-4 (0.00014136252059526733) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.627892e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.108327e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.617246e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.104949e+02 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -202,23 +202,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.241e-06 [1.2405717007921116E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.241e-06 [1.2405361288903015E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 52.4912s - [COUNTERS] Fortran Overhead ( 0 ) : 24.8093s - [COUNTERS] CudaCpp MEs ( 2 ) : 27.6819s for 8192 events => throughput is 2.96E+02 events/s + [COUNTERS] PROGRAM TOTAL : 39.8100s + [COUNTERS] Fortran Overhead ( 0 ) : 18.3061s + [COUNTERS] CudaCpp MEs ( 2 ) : 21.5039s for 8192 events => throughput is 3.81E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2405717007921116E-006) differ by less than 4E-4 (0.00013961480525170877) +OK! xsec from fortran (1.2403628942014972E-006) and cpp (1.2405361288903015E-006) differ by less than 4E-4 (0.0001396645204514435) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -236,37 +236,37 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.333e-07 [2.3326284900828787E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.333e-07 [2.3326076878598447E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 333.5578s - [COUNTERS] Fortran Overhead ( 0 ) : 28.7441s - [COUNTERS] CudaCpp MEs ( 2 ) : 304.8137s for 90112 events => throughput is 2.96E+02 events/s + [COUNTERS] PROGRAM TOTAL : 256.1064s + [COUNTERS] Fortran Overhead ( 0 ) : 21.0308s + [COUNTERS] CudaCpp MEs ( 2 ) : 235.0755s for 90112 events => throughput is 3.83E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326284900828787E-007) differ by less than 4E-4 (0.00014114029707035236) +OK! xsec from fortran (2.3322783648085419E-007) and cpp (2.3326076878598447E-007) differ by less than 4E-4 (0.00014120229226155523) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.354565e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.607312e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.352919e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.620181e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -280,23 +280,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.241e-06 [1.2405716659252656E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.241e-06 [1.2405360895331841E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 26.9353s - [COUNTERS] Fortran Overhead ( 0 ) : 12.5805s - [COUNTERS] CudaCpp MEs ( 2 ) : 14.3549s for 8192 events => throughput is 5.71E+02 events/s + [COUNTERS] PROGRAM TOTAL : 18.0753s + [COUNTERS] Fortran Overhead ( 0 ) : 8.3409s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.7343s for 8192 events => throughput is 8.42E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2405716659252656E-006) differ by less than 4E-4 (0.00013958669586155992) +OK! xsec from fortran (1.2403628942014972E-006) and cpp (1.2405360895331841E-006) differ by less than 4E-4 (0.00013963279012663143) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -314,195 +314,45 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.333e-07 [2.3326277036840957E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.333e-07 [2.3326069099562333E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 175.5244s - [COUNTERS] Fortran Overhead ( 0 ) : 16.6137s - [COUNTERS] CudaCpp MEs ( 2 ) : 158.9107s for 90112 events => throughput is 5.67E+02 events/s + [COUNTERS] PROGRAM TOTAL : 117.1292s + [COUNTERS] Fortran Overhead ( 0 ) : 11.0028s + [COUNTERS] CudaCpp MEs ( 2 ) : 106.1264s for 90112 events => throughput is 8.49E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326277036840957E-007) differ by less than 4E-4 (0.00014080311959907554) +OK! xsec from fortran (2.3322783648085419E-007) and cpp (2.3326069099562333E-007) differ by less than 4E-4 (0.00014086875419705436) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.796719e+02 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.813418e+02 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.241e-06 [1.2405716659252656E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 24.0035s - [COUNTERS] Fortran Overhead ( 0 ) : 11.1136s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.8899s for 8192 events => throughput is 6.36E+02 events/s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2405716659252656E-006) differ by less than 4E-4 (0.00013958669586155992) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.333e-07 [2.3326277036840957E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 155.2962s - [COUNTERS] Fortran Overhead ( 0 ) : 15.0900s - [COUNTERS] CudaCpp MEs ( 2 ) : 140.2063s for 90112 events => throughput is 6.43E+02 events/s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326277036840957E-007) differ by less than 4E-4 (0.00014080311959907554) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.795563e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.038176e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.781198e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.039143e+03 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.241e-06 [1.2405719306052570E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 25.3167s - [COUNTERS] Fortran Overhead ( 0 ) : 12.6125s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.7041s for 8192 events => throughput is 6.45E+02 events/s +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2405719306052570E-006) differ by less than 4E-4 (0.00013980007888836354) +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.333e-07 [2.3326283660088769E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 157.3576s - [COUNTERS] Fortran Overhead ( 0 ) : 16.6484s - [COUNTERS] CudaCpp MEs ( 2 ) : 140.7092s for 90112 events => throughput is 6.40E+02 events/s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326283660088769E-007) differ by less than 4E-4 (0.00014108709892313165) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.841550e+02 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.830427e+02 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -514,28 +364,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.241e-06 [1.2405722175509512E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.241e-06 [1.2405363557292459E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 2.5511s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0591s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4920s for 8192 events => throughput is 1.66E+04 events/s + [COUNTERS] PROGRAM TOTAL : 6.2636s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4305s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.8331s for 8192 events => throughput is 4.47E+03 events/s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939174E-006) and cuda (1.2405722175509512E-006) differ by less than 4E-4 (0.00014003141235829908) +OK! xsec from fortran (1.2403628942014972E-006) and hip (1.2405363557292459E-006) differ by less than 4E-4 (0.00013984740156258724) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -547,67 +397,65 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.333e-07 [2.3326296967941821E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.333e-07 [2.3326074784076956E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 11.4353s - [COUNTERS] Fortran Overhead ( 0 ) : 6.0032s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.4321s for 90112 events => throughput is 1.66E+04 events/s + [COUNTERS] PROGRAM TOTAL : 27.0950s + [COUNTERS] Fortran Overhead ( 0 ) : 6.8776s + [COUNTERS] CudaCpp MEs ( 2 ) : 20.2175s for 90112 events => throughput is 4.46E+03 events/s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655967E-007) and cuda (2.3326296967941821E-007) differ by less than 4E-4 (0.0001416576883412901) +OK! xsec from fortran (2.3322783648085419E-007) and hip (2.3326074784076956E-007) differ by less than 4E-4 (0.00014111248645076735) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.630624e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.435238e+03 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.646596e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.502637e+03 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.329013e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.290043e+03 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.359221e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.455740e+03 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.329144e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.270194e+03 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.339287e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.000099e+03 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.285838e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.276751e+03 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.423096e+03 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.072297e+03 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 9cddd5fe7c..8b9cb02837 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg - -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2024-05-16_04:26:13 +DATE: 2024-05-16_22:21:51 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 + [XSECTION] Cross section = 1.24e-06 [1.2403628942014972E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 101.9892s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5028s - [COUNTERS] Fortran MEs ( 1 ) : 101.4864s for 8192 events => throughput is 8.07E+01 events/s + [COUNTERS] PROGRAM TOTAL : 54.5583s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3675s + [COUNTERS] Fortran MEs ( 1 ) : 54.1908s for 8192 events => throughput is 1.51E+02 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 + [XSECTION] Cross section = 1.24e-06 [1.2403628942014972E-006] fbridge_mode=0 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 101.7400s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5055s - [COUNTERS] Fortran MEs ( 1 ) : 101.2345s for 8192 events => throughput is 8.09E+01 events/s + [COUNTERS] PROGRAM TOTAL : 54.5792s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4048s + [COUNTERS] Fortran MEs ( 1 ) : 54.1744s for 8192 events => throughput is 1.51E+02 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x10_fortran > /tmp/valassia/output_ggttggg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.332e-07 [2.3322783648085419E-007] fbridge_mode=0 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1119.6356s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4254s - [COUNTERS] Fortran MEs ( 1 ) : 1115.2102s for 90112 events => throughput is 8.08E+01 events/s + [COUNTERS] PROGRAM TOTAL : 597.8493s + [COUNTERS] Fortran Overhead ( 0 ) : 3.0318s + [COUNTERS] Fortran MEs ( 1 ) : 594.8175s for 90112 events => throughput is 1.51E+02 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985299359844E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.24e-06 [1.2403629013416990E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 229.4020s - [COUNTERS] Fortran Overhead ( 0 ) : 103.2152s - [COUNTERS] CudaCpp MEs ( 2 ) : 126.1868s for 8192 events => throughput is 6.49E+01 events/s + [COUNTERS] PROGRAM TOTAL : 175.7021s + [COUNTERS] Fortran Overhead ( 0 ) : 80.5963s + [COUNTERS] CudaCpp MEs ( 2 ) : 95.1058s for 8192 events => throughput is 8.61E+01 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985299359844E-006) differ by less than 2E-4 (5.7578810608305275e-09) +OK! xsec from fortran (1.2403628942014972E-006) and cpp (1.2403629013416990E-006) differ by less than 2E-4 (5.7565425759520394e-09) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993212353001E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.332e-07 [2.3322783773791503E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1528.1049s - [COUNTERS] Fortran Overhead ( 0 ) : 113.9982s - [COUNTERS] CudaCpp MEs ( 2 ) : 1414.1067s for 90112 events => throughput is 6.37E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1136.0885s + [COUNTERS] Fortran Overhead ( 0 ) : 83.3490s + [COUNTERS] CudaCpp MEs ( 2 ) : 1052.7395s for 90112 events => throughput is 8.56E+01 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993212353001E-007) differ by less than 2E-4 (5.389404034161771e-09) +OK! xsec from fortran (2.3322783648085419E-007) and cpp (2.3322783773791503E-007) differ by less than 2E-4 (5.389840573855054e-09) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.425842e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.026349e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.948869e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.024407e+02 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985295828471E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.24e-06 [1.2403629009850969E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 117.4242s - [COUNTERS] Fortran Overhead ( 0 ) : 53.6967s - [COUNTERS] CudaCpp MEs ( 2 ) : 63.7275s for 8192 events => throughput is 1.29E+02 events/s + [COUNTERS] PROGRAM TOTAL : 79.2661s + [COUNTERS] Fortran Overhead ( 0 ) : 35.2128s + [COUNTERS] CudaCpp MEs ( 2 ) : 44.0533s for 8192 events => throughput is 1.86E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985295828471E-006) differ by less than 2E-4 (5.473184350179849e-09) +OK! xsec from fortran (1.2403628942014972E-006) and cpp (1.2403629009850969E-006) differ by less than 2E-4 (5.469044328521022e-09) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993222645653E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.332e-07 [2.3322783784120318E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 756.7451s - [COUNTERS] Fortran Overhead ( 0 ) : 57.7650s - [COUNTERS] CudaCpp MEs ( 2 ) : 698.9802s for 90112 events => throughput is 1.29E+02 events/s + [COUNTERS] PROGRAM TOTAL : 520.4130s + [COUNTERS] Fortran Overhead ( 0 ) : 37.9407s + [COUNTERS] CudaCpp MEs ( 2 ) : 482.4723s for 90112 events => throughput is 1.87E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993222645653E-007) differ by less than 2E-4 (5.830713245558172e-09) +OK! xsec from fortran (2.3322783648085419E-007) and cpp (2.3322783784120318E-007) differ by less than 2E-4 (5.832704319530535e-09) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.540886e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.333934e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.526888e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.350214e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.24e-06 [1.2403629007633195E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 51.3666s - [COUNTERS] Fortran Overhead ( 0 ) : 23.6472s - [COUNTERS] CudaCpp MEs ( 2 ) : 27.7194s for 8192 events => throughput is 2.96E+02 events/s + [COUNTERS] PROGRAM TOTAL : 34.4462s + [COUNTERS] Fortran Overhead ( 0 ) : 15.3748s + [COUNTERS] CudaCpp MEs ( 2 ) : 19.0714s for 8192 events => throughput is 4.30E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985293629285E-006) differ by less than 2E-4 (5.29588750630694e-09) +OK! xsec from fortran (1.2403628942014972E-006) and cpp (1.2403629007633195E-006) differ by less than 2E-4 (5.290244020628165e-09) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,190 +309,44 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.332e-07 [2.3322783783946155E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 334.4015s - [COUNTERS] Fortran Overhead ( 0 ) : 27.7321s - [COUNTERS] CudaCpp MEs ( 2 ) : 306.6693s for 90112 events => throughput is 2.94E+02 events/s + [COUNTERS] PROGRAM TOTAL : 227.1590s + [COUNTERS] Fortran Overhead ( 0 ) : 17.9408s + [COUNTERS] CudaCpp MEs ( 2 ) : 209.2182s for 90112 events => throughput is 4.31E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993222447204E-007) differ by less than 2E-4 (5.822204496297445e-09) +OK! xsec from fortran (2.3322783648085419E-007) and cpp (2.3322783783946155E-007) differ by less than 2E-4 (5.825236737422301e-09) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.517938e+02 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.525606e+02 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 44.9641s - [COUNTERS] Fortran Overhead ( 0 ) : 20.5328s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.4313s for 8192 events => throughput is 3.35E+02 events/s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985293629285E-006) differ by less than 2E-4 (5.29588750630694e-09) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 294.2131s - [COUNTERS] Fortran Overhead ( 0 ) : 24.5260s - [COUNTERS] CudaCpp MEs ( 2 ) : 269.6871s for 90112 events => throughput is 3.34E+02 events/s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993222447204E-007) differ by less than 2E-4 (5.822204496297445e-09) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.118767e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.444423e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.099496e+02 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 49.6272s - [COUNTERS] Fortran Overhead ( 0 ) : 23.9735s - [COUNTERS] CudaCpp MEs ( 2 ) : 25.6537s for 8192 events => throughput is 3.19E+02 events/s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985293629285E-006) differ by less than 2E-4 (5.29588750630694e-09) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.457069e+02 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 308.1385s - [COUNTERS] Fortran Overhead ( 0 ) : 28.1125s - [COUNTERS] CudaCpp MEs ( 2 ) : 280.0260s for 90112 events => throughput is 3.22E+02 events/s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993222447204E-007) differ by less than 2E-4 (5.822204496297445e-09) +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.494217e+02 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.498492e+02 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -504,28 +358,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985217419736E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.24e-06 [1.2403628931370709E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 3.6127s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7479s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8648s for 8192 events => throughput is 9.47E+03 events/s + [COUNTERS] PROGRAM TOTAL : 12.3847s + [COUNTERS] Fortran Overhead ( 0 ) : 8.1395s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.2452s for 8192 events => throughput is 1.93E+03 events/s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939174E-006) and cuda (1.2403985217419736E-006) differ by less than 2E-4 (8.480691704448873e-10) +OK! xsec from fortran (1.2403628942014972E-006) and hip (1.2403628931370709E-006) differ by less than 2E-4 (8.581571009358413e-10) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -537,67 +391,65 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993078576733E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.332e-07 [2.3322783640044522E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 16.2177s - [COUNTERS] Fortran Overhead ( 0 ) : 6.7356s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.4821s for 90112 events => throughput is 9.50E+03 events/s + [COUNTERS] PROGRAM TOTAL : 57.3104s + [COUNTERS] Fortran Overhead ( 0 ) : 10.5772s + [COUNTERS] CudaCpp MEs ( 2 ) : 46.7332s for 90112 events => throughput is 1.93E+03 events/s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655967E-007) and cuda (2.3322993078576733E-007) differ by less than 2E-4 (3.464063480507207e-10) +OK! xsec from fortran (2.3322783648085419E-007) and hip (2.3322783640044522E-007) differ by less than 2E-4 (3.447657714872321e-10) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.422089e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.938523e+03 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.074505e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.964520e+03 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.108350e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.238719e+03 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.160591e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.382190e+03 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.110190e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.292322e+03 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.113742e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.191145e+03 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.112799e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.289765e+03 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.647292e+03 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.081215e+03 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index c909267a2d..345e75f604 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone - +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2024-05-16_01:42:38 +DATE: 2024-05-16_20:56:31 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 + [XSECTION] Cross section = 0.2711 [0.27110226551166922] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1817 events) - [COUNTERS] PROGRAM TOTAL : 0.4817s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4067s - [COUNTERS] Fortran MEs ( 1 ) : 0.0751s for 8192 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4335s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3854s + [COUNTERS] Fortran MEs ( 1 ) : 0.0481s for 8192 events => throughput is 1.70E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 + [XSECTION] Cross section = 0.2711 [0.27110226551166922] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4101s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3363s - [COUNTERS] Fortran MEs ( 1 ) : 0.0738s for 8192 events => throughput is 1.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3071s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2590s + [COUNTERS] Fortran MEs ( 1 ) : 0.0481s for 8192 events => throughput is 1.70E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/avalassi/output_gqttq_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x10_fortran > /tmp/valassia/output_gqttq_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0 + [XSECTION] Cross section = 0.2151 [0.21510679754343823] fbridge_mode=0 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.3997s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5919s - [COUNTERS] Fortran MEs ( 1 ) : 0.8079s for 90112 events => throughput is 1.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7403s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2151s + [COUNTERS] Fortran MEs ( 1 ) : 0.5252s for 90112 events => throughput is 1.72E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539351263335] fbridge_mode=1 + [XSECTION] Cross section = 0.2711 [0.27110226551166922] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4922s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4133s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0789s for 8192 events => throughput is 1.04E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4230s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3528s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0702s for 8192 events => throughput is 1.17E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539351263335) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.27110226551166922) and cpp (0.27110226551166922) differ by less than 3E-14 (0.0) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686556561293] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510679754343820] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.5284s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6490s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8794s for 90112 events => throughput is 1.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0612s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2886s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7726s for 90112 events => throughput is 1.17E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561293) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (0.21510679754343823) and cpp (0.21510679754343820) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.038604e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.206967e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.042402e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.209538e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539351262530] fbridge_mode=1 + [XSECTION] Cross section = 0.2711 [0.27110226551166122] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4203s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3775s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0428s for 8192 events => throughput is 1.91E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3320s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2981s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0339s for 8192 events => throughput is 2.41E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539351262530) differ by less than 3E-14 (2.9531932455029164e-14) +OK! xsec from fortran (0.27110226551166922) and cpp (0.27110226551166122) differ by less than 3E-14 (2.9531932455029164e-14) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686556561281] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510679754343823] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.0947s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6149s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4799s for 90112 events => throughput is 1.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6268s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2529s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3739s for 90112 events => throughput is 2.41E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561281) differ by less than 3E-14 (6.661338147750939e-16) +OK! xsec from fortran (0.21510679754343823) and cpp (0.21510679754343823) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.949373e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.436246e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.919896e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.444759e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1 + [XSECTION] Cross section = 0.2711 [0.27110226551166922] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3846s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3601s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0244s for 8192 events => throughput is 3.35E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2992s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2817s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0175s for 8192 events => throughput is 4.69E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539351263341) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (0.27110226551166922) and cpp (0.27110226551166922) differ by less than 3E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,114 +309,44 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510679754343823] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.8777s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6020s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2756s for 90112 events => throughput is 3.27E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4300s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2395s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1905s for 90112 events => throughput is 4.73E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561295) differ by less than 3E-14 (0.0) +OK! xsec from fortran (0.21510679754343823) and cpp (0.21510679754343823) differ by less than 3E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.197689e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.830686e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.341628e+05 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1 - [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3821s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3591s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0230s for 8192 events => throughput is 3.56E+05 events/s +Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.794875e+05 ) sec^-1 -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539351263341) differ by less than 3E-14 (4.440892098500626e-16) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 - [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.8414s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5939s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2475s for 90112 events => throughput is 3.64E+05 events/s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561295) differ by less than 3E-14 (0.0) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.543777e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.667139e+05 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -428,176 +358,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1 - [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4059s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3719s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0340s for 8192 events => throughput is 2.41E+05 events/s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539351263341) differ by less than 3E-14 (4.440892098500626e-16) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 - [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.0051s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6219s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3832s for 90112 events => throughput is 2.35E+05 events/s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561295) differ by less than 3E-14 (0.0) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.349902e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.363260e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539351263363] fbridge_mode=1 - [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.7762s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7756s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.23E+07 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.27110539351263330) and cuda (0.27110539351263363) differ by less than 3E-14 (1.3322676295501878e-15) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686556561304] fbridge_mode=1 - [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.0116s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0036s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0080s for 90112 events => throughput is 1.13E+07 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21510686556561295) and cuda (0.21510686556561304) differ by less than 3E-14 (4.440892098500626e-16) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.582112e+07 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.121699e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.529721e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.531629e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.538677e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.807286e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.530045e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.783374e+07 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** - -TEST COMPLETED +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' +Memory access fault by GPU node-4 (Agent handle: 0x63ef410) on address 0x1476e6b2a000. Reason: Unknown. + +Program received signal SIGABRT: Process abort signal. + +Backtrace for this error: +#0 0x147a7d01b90f in ??? +#1 0x147a7ce58d2b in ??? +#2 0x147a7ce5a3e4 in ??? +#3 0x147a74421b64 in ??? +#4 0x147a7441eb38 in ??? +#5 0x147a743dc496 in ??? +#6 0x147a7d00f6e9 in ??? +#7 0x147a7cf2649e in ??? +#8 0xffffffffffffffff in ??? +./madX.sh: line 379: 45053 Aborted $timecmd $cmd < ${tmpin} > ${tmp} +ERROR! ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' failed + PDF set = nn23lo1 + alpha_s(Mz)= 0.1300 running at 2 loops. + alpha_s(Mz)= 0.1300 running at 2 loops. + Renormalization scale set on event-by-event basis + Factorization scale set on event-by-event basis + + + getting user params +Enter number of events and max and min iterations: + Number of events and iterations 8192 1 1 diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 4ac5ec3dc1..1ca71b10fc 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu - -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2024-05-16_13:45:22 +DATE: 2024-05-16_20:56:44 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 + [XSECTION] Cross section = 0.2711 [0.27110226551166922] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1817 events) - [COUNTERS] PROGRAM TOTAL : 0.4857s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4107s - [COUNTERS] Fortran MEs ( 1 ) : 0.0749s for 8192 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3556s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3075s + [COUNTERS] Fortran MEs ( 1 ) : 0.0481s for 8192 events => throughput is 1.70E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 + [XSECTION] Cross section = 0.2711 [0.27110226551166922] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4161s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3412s - [COUNTERS] Fortran MEs ( 1 ) : 0.0749s for 8192 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3109s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2628s + [COUNTERS] Fortran MEs ( 1 ) : 0.0481s for 8192 events => throughput is 1.70E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/avalassi/output_gqttq_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x10_fortran > /tmp/valassia/output_gqttq_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0 + [XSECTION] Cross section = 0.2151 [0.21510679754343823] fbridge_mode=0 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.4361s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6155s - [COUNTERS] Fortran MEs ( 1 ) : 0.8206s for 90112 events => throughput is 1.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7401s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2147s + [COUNTERS] Fortran MEs ( 1 ) : 0.5255s for 90112 events => throughput is 1.71E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110463093540638] fbridge_mode=1 + [XSECTION] Cross section = 0.2711 [0.27110149549279866] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4907s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4145s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0762s for 8192 events => throughput is 1.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3780s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3208s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0572s for 8192 events => throughput is 1.43E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110463093540638) differ by less than 4E-4 (2.812844174915341e-06) +OK! xsec from fortran (0.27110226551166922) and cpp (0.27110149549279866) differ by less than 4E-4 (2.840326210895583e-06) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686273216112] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510678843355344] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.5059s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6664s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8394s for 90112 events => throughput is 1.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8985s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2738s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6247s for 90112 events => throughput is 1.44E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686273216112) differ by less than 4E-4 (1.3172298474195543e-08) +OK! xsec from fortran (0.21510679754343823) and cpp (0.21510678843355344) differ by less than 4E-4 (4.2350520312872675e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.092440e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.471741e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.094603e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.471384e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110459152958460] fbridge_mode=1 + [XSECTION] Cross section = 0.2711 [0.27110146988852984] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3934s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3663s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0270s for 8192 events => throughput is 3.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3043s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2842s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0201s for 8192 events => throughput is 4.08E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110459152958460) differ by less than 4E-4 (2.9581965829139634e-06) +OK! xsec from fortran (0.27110226551166922) and cpp (0.27110146988852984) differ by less than 4E-4 (2.934771267448788e-06) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510683016166510] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510676993136629] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.9232s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6228s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3004s for 90112 events => throughput is 3.00E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4572s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2368s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2204s for 90112 events => throughput is 4.09E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561295) and cpp (0.21510683016166510) differ by less than 4E-4 (1.6458771667782202e-07) +OK! xsec from fortran (0.21510679754343823) and cpp (0.21510676993136629) differ by less than 4E-4 (1.2836447871311663e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.039638e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.255154e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.034015e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.282301e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110460595003461] fbridge_mode=1 + [XSECTION] Cross section = 0.2711 [0.27110148793566186] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3713s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3577s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0136s for 8192 events => throughput is 6.04E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2823s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2727s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0097s for 8192 events => throughput is 8.48E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110460595003461) differ by less than 4E-4 (2.9050052766654844e-06) +OK! xsec from fortran (0.27110226551166922) and cpp (0.27110148793566186) differ by less than 4E-4 (2.8682018052839098e-06) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,114 +309,44 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510682502089912] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510676419088856] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.7502s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6011s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1490s for 90112 events => throughput is 6.05E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3362s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2299s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1064s for 90112 events => throughput is 8.47E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561295) and cpp (0.21510682502089912) differ by less than 4E-4 (1.8848637739488083e-07) +OK! xsec from fortran (0.21510679754343823) and cpp (0.21510676419088856) differ by less than 4E-4 (1.5505111905511626e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.923571e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.680750e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.853443e+05 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110460595003461] fbridge_mode=1 - [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3638s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3514s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0124s for 8192 events => throughput is 6.61E+05 events/s +Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.758171e+05 ) sec^-1 -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110460595003461) differ by less than 4E-4 (2.9050052766654844e-06) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510682502089912] fbridge_mode=1 - [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.7444s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6060s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1385s for 90112 events => throughput is 6.51E+05 events/s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21510686556561295) and cpp (0.21510682502089912) differ by less than 4E-4 (1.8848637739488083e-07) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.334346e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.476144e+05 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -428,178 +358,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110464176080312] fbridge_mode=1 - [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3762s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3583s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0178s for 8192 events => throughput is 4.59E+05 events/s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110464176080312) differ by less than 4E-4 (2.772913590631809e-06) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510685411522326] fbridge_mode=1 - [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.8096s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6129s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1967s for 90112 events => throughput is 4.58E+05 events/s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21510686556561295) and cpp (0.21510685411522326) differ by less than 4E-4 (5.3231167917999755e-08) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.750606e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.766894e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110478167944563] fbridge_mode=1 - [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.7793s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7788s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.48E+07 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.27110539351263330) and cuda (0.27110478167944563) differ by less than 4E-4 (2.2568093527297606e-06) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510689885789414] fbridge_mode=1 - [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.0413s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0348s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0065s for 90112 events => throughput is 1.38E+07 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21510686556561295) and cuda (0.21510689885789414) differ by less than 4E-4 (1.547708907700951e-07) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.566939e+07 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.326602e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.593547e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.720103e+08 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.619232e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.806222e+08 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.144615e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.016256e+07 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** - -TEST COMPLETED +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' +Memory access fault by GPU node-4 (Agent handle: 0x63ef430) on address 0x145d8d504000. Reason: Unknown. + +Program received signal SIGABRT: Process abort signal. + +Backtrace for this error: +#0 0x14612399790f in ??? +#1 0x1461237d4d2b in ??? +#2 0x1461237d63e4 in ??? +#3 0x14611ad9db64 in ??? +#4 0x14611ad9ab38 in ??? +#5 0x14611ad58496 in ??? +#6 0x14612398b6e9 in ??? +#7 0x1461238a249e in ??? +#8 0xffffffffffffffff in ??? +./madX.sh: line 379: 46236 Aborted $timecmd $cmd < ${tmpin} > ${tmp} +ERROR! ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' failed + PDF set = nn23lo1 + alpha_s(Mz)= 0.1300 running at 2 loops. + alpha_s(Mz)= 0.1300 running at 2 loops. + Renormalization scale set on event-by-event basis + Factorization scale set on event-by-event basis + + + getting user params +Enter number of events and max and min iterations: + Number of events and iterations 8192 1 1 diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index 23f8d1233a..0b8f6a32df 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu - -make USEBUILDDIR=1 BACKEND=cuda +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' + make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2024-05-16_01:43:25 +DATE: 2024-05-16_20:56:57 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 + [XSECTION] Cross section = 0.2711 [0.27110226551166922] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1817 events) - [COUNTERS] PROGRAM TOTAL : 0.4870s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4120s - [COUNTERS] Fortran MEs ( 1 ) : 0.0750s for 8192 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3529s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3048s + [COUNTERS] Fortran MEs ( 1 ) : 0.0481s for 8192 events => throughput is 1.70E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 + [XSECTION] Cross section = 0.2711 [0.27110226551166922] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4171s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3424s - [COUNTERS] Fortran MEs ( 1 ) : 0.0747s for 8192 events => throughput is 1.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3100s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2620s + [COUNTERS] Fortran MEs ( 1 ) : 0.0481s for 8192 events => throughput is 1.70E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/avalassi/output_gqttq_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x10_fortran > /tmp/valassia/output_gqttq_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0 + [XSECTION] Cross section = 0.2151 [0.21510679754343823] fbridge_mode=0 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.4382s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6232s - [COUNTERS] Fortran MEs ( 1 ) : 0.8150s for 90112 events => throughput is 1.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7393s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2140s + [COUNTERS] Fortran MEs ( 1 ) : 0.5252s for 90112 events => throughput is 1.72E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539348916002] fbridge_mode=1 + [XSECTION] Cross section = 0.2711 [0.27110226549005623] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.5038s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4232s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0806s for 8192 events => throughput is 1.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4032s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3341s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0692s for 8192 events => throughput is 1.18E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539348916002) differ by less than 2E-4 (8.658362915525686e-11) +OK! xsec from fortran (0.27110226551166922) and cpp (0.27110226549005623) differ by less than 2E-4 (7.972267290767832e-11) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686560794337] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510679758658835] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.5722s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6823s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8899s for 90112 events => throughput is 1.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0516s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2896s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7620s for 90112 events => throughput is 1.18E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686560794337) differ by less than 2E-4 (1.967879192932287e-10) +OK! xsec from fortran (0.21510679754343823) and cpp (0.21510679758658835) differ by less than 2E-4 (2.0059864880295208e-10) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.030982e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.200794e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.031715e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.203149e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539348916002] fbridge_mode=1 + [XSECTION] Cross section = 0.2711 [0.27110226549005628] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4234s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3811s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0422s for 8192 events => throughput is 1.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3316s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2985s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0331s for 8192 events => throughput is 2.47E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539348916002) differ by less than 2E-4 (8.658362915525686e-11) +OK! xsec from fortran (0.27110226551166922) and cpp (0.27110226549005628) differ by less than 2E-4 (7.972245086307339e-11) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686560794334] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510679758658832] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.1135s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6453s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4682s for 90112 events => throughput is 1.92E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6178s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2527s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3650s for 90112 events => throughput is 2.47E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686560794334) differ by less than 2E-4 (1.9678769724862377e-10) +OK! xsec from fortran (0.21510679754343823) and cpp (0.21510679758658832) differ by less than 2E-4 (2.0059842675834716e-10) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.925046e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.461318e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.926756e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.467281e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539330272815] fbridge_mode=1 + [XSECTION] Cross section = 0.2711 [0.27110226530029391] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3913s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3660s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0253s for 8192 events => throughput is 3.24E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3025s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2850s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0174s for 8192 events => throughput is 4.70E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539330272815) differ by less than 2E-4 (7.742566587864985e-10) +OK! xsec from fortran (0.27110226551166922) and cpp (0.27110226530029391) differ by less than 2E-4 (7.796884249344771e-10) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,114 +309,44 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686558551748] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510679756340242] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.8962s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6193s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2769s for 90112 events => throughput is 3.25E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4301s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2388s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1913s for 90112 events => throughput is 4.71E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686558551748) differ by less than 2E-4 (9.253309229961815e-11) +OK! xsec from fortran (0.21510679754343823) and cpp (0.21510679756340242) differ by less than 2E-4 (9.281064805577444e-11) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.266154e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.695225e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.269502e+05 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539330272815] fbridge_mode=1 - [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3831s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3610s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0221s for 8192 events => throughput is 3.70E+05 events/s +Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.795904e+05 ) sec^-1 -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539330272815) differ by less than 2E-4 (7.742566587864985e-10) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686558551748] fbridge_mode=1 - [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.8418s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5992s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2426s for 90112 events => throughput is 3.71E+05 events/s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686558551748) differ by less than 2E-4 (9.253309229961815e-11) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.789065e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.854777e+05 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -428,177 +358,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539330272815] fbridge_mode=1 - [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4096s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3737s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0358s for 8192 events => throughput is 2.29E+05 events/s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539330272815) differ by less than 2E-4 (7.742566587864985e-10) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686558551748] fbridge_mode=1 - [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.0275s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6170s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4105s for 90112 events => throughput is 2.20E+05 events/s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686558551748) differ by less than 2E-4 (9.253309229961815e-11) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.335204e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.335548e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539343558537] fbridge_mode=1 - [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.7731s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7724s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.22E+07 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.27110539351263330) and cuda (0.27110539343558537) differ by less than 2E-4 (2.8419910869104115e-10) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686553631395] fbridge_mode=1 - [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.0091s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0010s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0081s for 90112 events => throughput is 1.12E+07 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21510686556561295) and cuda (0.21510686553631395) differ by less than 2E-4 (1.3620671257541517e-10) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.632055e+07 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.046951e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.534300e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.533151e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.529948e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.832656e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.532773e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.792603e+07 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** - -TEST COMPLETED +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' +Memory access fault by GPU node-4 (Agent handle: 0x63ef410) on address 0x149212efa000. Reason: Page not present or supervisor privilege. + +Program received signal SIGABRT: Process abort signal. + +Backtrace for this error: +#0 0x1495a93d390f in ??? +#1 0x1495a9210d2b in ??? +#2 0x1495a92123e4 in ??? +#3 0x1495a07d9b64 in ??? +#4 0x1495a07d6b38 in ??? +#5 0x1495a0794496 in ??? +#6 0x1495a93c76e9 in ??? +#7 0x1495a92de49e in ??? +#8 0xffffffffffffffff in ??? +./madX.sh: line 379: 47427 Aborted $timecmd $cmd < ${tmpin} > ${tmp} +ERROR! ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' failed + PDF set = nn23lo1 + alpha_s(Mz)= 0.1300 running at 2 loops. + alpha_s(Mz)= 0.1300 running at 2 loops. + Renormalization scale set on event-by-event basis + Factorization scale set on event-by-event basis + + + getting user params +Enter number of events and max and min iterations: + Number of events and iterations 8192 1 1 diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt index 7b51bb9221..b976ca012f 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone - +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' + make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' OMP_NUM_THREADS= -DATE: 2024-05-16_05:58:47 +DATE: 2024-05-16_21:01:14 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0162955499256290] fbridge_mode=0 [UNWEIGHT] Wrote 3321 events (found 6423 events) - [COUNTERS] PROGRAM TOTAL : 0.9450s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8961s - [COUNTERS] Fortran MEs ( 1 ) : 0.0489s for 8192 events => throughput is 1.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.0496s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0170s + [COUNTERS] Fortran MEs ( 1 ) : 0.0325s for 8192 events => throughput is 2.52E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0162955499256290] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4283s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3794s - [COUNTERS] Fortran MEs ( 1 ) : 0.0489s for 8192 events => throughput is 1.67E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6276s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5952s + [COUNTERS] Fortran MEs ( 1 ) : 0.0324s for 8192 events => throughput is 2.53E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp/avalassi/output_heftggbb_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x10_fortran > /tmp/valassia/output_heftggbb_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=0 + [XSECTION] Cross section = 2.043 [2.0434895240377502] fbridge_mode=0 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.8593s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3225s - [COUNTERS] Fortran MEs ( 1 ) : 0.5368s for 90112 events => throughput is 1.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 4.7668s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4120s + [COUNTERS] Fortran MEs ( 1 ) : 0.3548s for 90112 events => throughput is 2.54E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256148] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0162955499256197] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4564s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4132s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0431s for 8192 events => throughput is 1.90E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6698s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6345s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0353s for 8192 events => throughput is 2.32E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955499256148) differ by less than 3E-14 (6.661338147750939e-16) +OK! xsec from fortran (2.0162955499256290) and cpp (2.0162955499256197) differ by less than 3E-14 (4.6629367034256575e-15) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895240377564] fbridge_mode=1 + [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.8316s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3535s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4781s for 90112 events => throughput is 1.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 4.8230s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4359s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3870s for 90112 events => throughput is 2.33E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895240377564) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (2.0434895240377502) and cpp (2.0434895240377569) differ by less than 3E-14 (3.3306690738754696e-15) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.935091e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.414981e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.955688e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.407163e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0162955499256205] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4208s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3966s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0242s for 8192 events => throughput is 3.38E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6333s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6138s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0194s for 8192 events => throughput is 4.21E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955499256161) differ by less than 3E-14 (0.0) +OK! xsec from fortran (2.0162955499256290) and cpp (2.0162955499256205) differ by less than 3E-14 (4.218847493575595e-15) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895240377564] fbridge_mode=1 + [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.6048s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3350s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2698s for 90112 events => throughput is 3.34E+05 events/s + [COUNTERS] PROGRAM TOTAL : 4.6418s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4278s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2140s for 90112 events => throughput is 4.21E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895240377564) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (2.0434895240377502) and cpp (2.0434895240377569) differ by less than 3E-14 (3.3306690738754696e-15) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.396840e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.251157e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.435183e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.285398e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256152] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0162955499256205] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4008s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3863s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0145s for 8192 events => throughput is 5.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6167s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6059s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0108s for 8192 events => throughput is 7.59E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955499256152) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (2.0162955499256290) and cpp (2.0162955499256205) differ by less than 3E-14 (4.218847493575595e-15) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,114 +309,44 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895240377560] fbridge_mode=1 + [XSECTION] Cross section = 2.043 [2.0434895240377564] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.5134s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3430s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1705s for 90112 events => throughput is 5.29E+05 events/s + [COUNTERS] PROGRAM TOTAL : 4.5279s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4098s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1181s for 90112 events => throughput is 7.63E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895240377560) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (2.0434895240377502) and cpp (2.0434895240377564) differ by less than 3E-14 (3.1086244689504383e-15) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.293625e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.922831e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.214602e+05 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256152] fbridge_mode=1 - [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3999s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3865s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0134s for 8192 events => throughput is 6.10E+05 events/s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955499256152) differ by less than 3E-14 (4.440892098500626e-16) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.953876e+05 ) sec^-1 -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895240377560] fbridge_mode=1 - [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.4652s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3183s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1469s for 90112 events => throughput is 6.13E+05 events/s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895240377560) differ by less than 3E-14 (4.440892098500626e-16) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.964595e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.873725e+05 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -428,28 +358,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256152] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0162955499256223] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4159s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3941s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0218s for 8192 events => throughput is 3.76E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8891s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8881s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0010s for 8192 events => throughput is 7.92E+06 events/s -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955499256152) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (2.0162955499256290) and hip (2.0162955499256223) differ by less than 3E-14 (3.3306690738754696e-15) -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -461,143 +391,65 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895240377560] fbridge_mode=1 + [XSECTION] Cross section = 2.043 [2.0434895240377577] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.5767s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3320s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2446s for 90112 events => throughput is 3.68E+05 events/s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895240377560) differ by less than 3E-14 (4.440892098500626e-16) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + [COUNTERS] PROGRAM TOTAL : 4.6970s + [COUNTERS] Fortran Overhead ( 0 ) : 4.6858s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0111s for 90112 events => throughput is 8.10E+06 events/s -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.692127e+05 ) sec^-1 +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.804832e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256165] fbridge_mode=1 - [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.8067s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8061s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.36E+07 events/s +OK! xsec from fortran (2.0434895240377502) and hip (2.0434895240377577) differ by less than 3E-14 (3.774758283725532e-15) -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! xsec from fortran (2.0162955499256161) and cuda (2.0162955499256165) differ by less than 3E-14 (2.220446049250313e-16) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895240377573] fbridge_mode=1 - [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.7545s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7475s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0070s for 90112 events => throughput is 1.29E+07 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.0434895240377569) and cuda (2.0434895240377573) differ by less than 3E-14 (2.220446049250313e-16) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.804334e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.522443e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.230995e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.937728e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.136468e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.540081e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.810257e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.483834e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.115441e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.539468e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.048562e+08 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.497797e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.115501e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.500298e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.748994e+07 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.024295e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt index d09b81d7d3..f1ee5d8619 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx - -make USEBUILDDIR=1 BACKEND=cuda - +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' + make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' OMP_NUM_THREADS= -DATE: 2024-05-16_05:59:14 +DATE: 2024-05-16_21:01:52 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0162955499256290] fbridge_mode=0 [UNWEIGHT] Wrote 3321 events (found 6423 events) - [COUNTERS] PROGRAM TOTAL : 0.9433s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8939s - [COUNTERS] Fortran MEs ( 1 ) : 0.0495s for 8192 events => throughput is 1.66E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9753s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9428s + [COUNTERS] Fortran MEs ( 1 ) : 0.0325s for 8192 events => throughput is 2.52E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0162955499256290] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4256s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3772s - [COUNTERS] Fortran MEs ( 1 ) : 0.0484s for 8192 events => throughput is 1.69E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6342s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6017s + [COUNTERS] Fortran MEs ( 1 ) : 0.0325s for 8192 events => throughput is 2.52E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp/avalassi/output_heftggbb_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x10_fortran > /tmp/valassia/output_heftggbb_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=0 + [XSECTION] Cross section = 2.043 [2.0434895240377502] fbridge_mode=0 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.8568s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3199s - [COUNTERS] Fortran MEs ( 1 ) : 0.5369s for 90112 events => throughput is 1.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 4.7687s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4136s + [COUNTERS] Fortran MEs ( 1 ) : 0.3551s for 90112 events => throughput is 2.54E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,43 +124,43 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162897371946169] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0162897089316618] fbridge_mode=1 [UNWEIGHT] Wrote 1620 events (found 1625 events) - [COUNTERS] PROGRAM TOTAL : 0.4525s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4113s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0413s for 8192 events => throughput is 1.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6580s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6273s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0307s for 8192 events => throughput is 2.67E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256161) and cpp (2.0162897371946169) differ by less than 4E-4 (2.8828764708777044e-06) +OK! xsec from fortran (2.0162955499256290) and cpp (2.0162897089316618) differ by less than 4E-4 (2.8968937452189536e-06) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** ERROR! events.lhe.cpp.1 and events.lhe.ref.1 differ! -diff /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.cpp.1 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.ref.1 | head -20 +diff /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.cpp.1 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.ref.1 | head -20 6206,6207c6206,6207 -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.59936081260E+01 0.59936081260E+01 0.00000000000E+00 0. -1. -< 5 1 1 2 501 0 0.45273385612E+02 -0.31131305296E+02 0.47763304676E+03 0.48080583916E+03 0.47000000000E+01 0. 1. +< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.59936081269E+01 0.59936081269E+01 0.00000000000E+00 0. -1. +< 5 1 1 2 501 0 0.45273385612E+02 -0.31131305296E+02 0.47763304668E+03 0.48080583909E+03 0.47000000000E+01 0. 1. --- -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.59936081260E+01 0.59936081260E+01 0.00000000000E+00 0. 1. -> 5 1 1 2 501 0 0.45273385612E+02 -0.31131305296E+02 0.47763304676E+03 0.48080583916E+03 0.47000000000E+01 0. -1. +> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.59936081269E+01 0.59936081269E+01 0.00000000000E+00 0. 1. +> 5 1 1 2 501 0 0.45273385612E+02 -0.31131305296E+02 0.47763304668E+03 0.48080583909E+03 0.47000000000E+01 0. -1. 8306,8307c8306,8307 -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.23857997239E+02 0.23857997239E+02 0.00000000000E+00 0. 1. -< 5 1 1 2 501 0 -0.34843521722E+02 0.35239303629E+02 0.13219496682E+02 0.51504607743E+02 0.47000000000E+01 0. -1. +< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.23857997230E+02 0.23857997230E+02 0.00000000000E+00 0. 1. +< 5 1 1 2 501 0 -0.34843521722E+02 0.35239303629E+02 0.13219496701E+02 0.51504607748E+02 0.47000000000E+01 0. -1. --- -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.23857997239E+02 0.23857997239E+02 0.00000000000E+00 0. -1. -> 5 1 1 2 501 0 -0.34843521722E+02 0.35239303629E+02 0.13219496682E+02 0.51504607743E+02 0.47000000000E+01 0. 1. +> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.23857997230E+02 0.23857997230E+02 0.00000000000E+00 0. -1. +> 5 1 1 2 501 0 -0.34843521722E+02 0.35239303629E+02 0.13219496701E+02 0.51504607748E+02 0.47000000000E+01 0. 1. 9606,9619d9605 < 4 1 1E-03 0.1250139E+03 0.7546771E-02 0.1235066E+00 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.94948250004E+03 0.94948250004E+03 0.00000000000E+00 0. 1. -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.41149990002E+01 0.41149990002E+01 0.00000000000E+00 0. -1. -< 5 1 1 2 501 0 -0.96459450317E+01 -0.34409175043E+02 0.83136584965E+02 0.90613560477E+02 0.47000000000E+01 0. -1. -< -5 1 1 2 0 501 0.96459450317E+01 0.34409175043E+02 0.86223091608E+03 0.86298393857E+03 0.47000000000E+01 0. 1. +< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.94948249861E+03 0.94948249861E+03 0.00000000000E+00 0. 1. +< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.41149990064E+01 0.41149990064E+01 0.00000000000E+00 0. -1. +< 5 1 1 2 501 0 -0.96459450317E+01 -0.34409175043E+02 0.83136584828E+02 0.90613560351E+02 0.47000000000E+01 0. -1. +< -5 1 1 2 0 501 0.96459450317E+01 0.34409175043E+02 0.86223091477E+03 0.86298393726E+03 0.47000000000E+01 0. 1. < < 0 0.12501391E+03 diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt index 291c38991b..fb923ad0e1 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx - -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' OMP_NUM_THREADS= -DATE: 2024-05-16_05:59:20 +DATE: 2024-05-16_21:02:01 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0162955499256290] fbridge_mode=0 [UNWEIGHT] Wrote 3321 events (found 6423 events) - [COUNTERS] PROGRAM TOTAL : 0.9570s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9073s - [COUNTERS] Fortran MEs ( 1 ) : 0.0498s for 8192 events => throughput is 1.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9762s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9437s + [COUNTERS] Fortran MEs ( 1 ) : 0.0325s for 8192 events => throughput is 2.52E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0162955499256290] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4201s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3715s - [COUNTERS] Fortran MEs ( 1 ) : 0.0486s for 8192 events => throughput is 1.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6290s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5966s + [COUNTERS] Fortran MEs ( 1 ) : 0.0324s for 8192 events => throughput is 2.53E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp/avalassi/output_heftggbb_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x10_fortran > /tmp/valassia/output_heftggbb_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=0 + [XSECTION] Cross section = 2.043 [2.0434895240377502] fbridge_mode=0 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.8553s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3195s - [COUNTERS] Fortran MEs ( 1 ) : 0.5358s for 90112 events => throughput is 1.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 4.7727s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4178s + [COUNTERS] Fortran MEs ( 1 ) : 0.3549s for 90112 events => throughput is 2.54E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,23 +124,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955975930954] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0162955975931003] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4604s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4161s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0443s for 8192 events => throughput is 1.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6634s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6288s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0346s for 8192 events => throughput is 2.37E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955975930954) differ by less than 2E-4 (2.3641117063988304e-08) +OK! xsec from fortran (2.0162955499256290) and cpp (2.0162955975931003) differ by less than 2E-4 (2.364111328923002e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -158,23 +158,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895706383660] fbridge_mode=1 + [XSECTION] Cross section = 2.043 [2.0434895706383704] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.8327s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3534s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4793s for 90112 events => throughput is 1.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 4.8210s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4405s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3805s for 90112 events => throughput is 2.37E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895706383660) differ by less than 2E-4 (2.2804427679545825e-08) +OK! xsec from fortran (2.0434895240377502) and cpp (2.0434895706383704) differ by less than 2E-4 (2.2804433230660948e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -182,15 +182,15 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.817766e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.380153e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.799752e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.394562e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,23 +204,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955975930958] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0162955975931007] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4227s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3979s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0248s for 8192 events => throughput is 3.30E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6350s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6162s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0188s for 8192 events => throughput is 4.36E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955975930958) differ by less than 2E-4 (2.364111728603291e-08) +OK! xsec from fortran (2.0162955499256290) and cpp (2.0162955975931007) differ by less than 2E-4 (2.3641113511274625e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -238,23 +238,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895706383669] fbridge_mode=1 + [XSECTION] Cross section = 2.043 [2.0434895706383709] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.6020s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3321s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2699s for 90112 events => throughput is 3.34E+05 events/s + [COUNTERS] PROGRAM TOTAL : 4.6291s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4234s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2057s for 90112 events => throughput is 4.38E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895706383669) differ by less than 2E-4 (2.2804428123635034e-08) +OK! xsec from fortran (2.0434895240377502) and cpp (2.0434895706383709) differ by less than 2E-4 (2.2804433452705553e-08) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -262,15 +262,15 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.208317e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.439993e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.242147e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.453301e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,23 +284,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955953691082] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0162955953691122] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4018s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3865s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0153s for 8192 events => throughput is 5.35E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6162s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6054s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0107s for 8192 events => throughput is 7.64E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955953691082) differ by less than 2E-4 (2.253811048902321e-08) +OK! xsec from fortran (2.0162955499256290) and cpp (2.0162955953691122) differ by less than 2E-4 (2.2538106270175717e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -318,23 +318,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895701243878] fbridge_mode=1 + [XSECTION] Cross section = 2.043 [2.0434895701243891] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.4998s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3319s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1679s for 90112 events => throughput is 5.37E+05 events/s + [COUNTERS] PROGRAM TOTAL : 4.5291s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4114s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1177s for 90112 events => throughput is 7.65E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895701243878) differ by less than 2E-4 (2.255290776354002e-08) +OK! xsec from fortran (2.0434895240377502) and cpp (2.0434895701243891) differ by less than 2E-4 (2.255291176034291e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -342,177 +342,23 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.916130e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.911254e+05 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955953691082] fbridge_mode=1 - [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4012s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3876s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0136s for 8192 events => throughput is 6.01E+05 events/s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955953691082) differ by less than 2E-4 (2.253811048902321e-08) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895701243878] fbridge_mode=1 - [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.4747s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3226s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1520s for 90112 events => throughput is 5.93E+05 events/s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895701243878) differ by less than 2E-4 (2.255290776354002e-08) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.307270e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.910321e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.162265e+05 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955953691082] fbridge_mode=1 - [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4179s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3948s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0231s for 8192 events => throughput is 3.55E+05 events/s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955953691082) differ by less than 2E-4 (2.253811048902321e-08) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895701243878] fbridge_mode=1 - [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.5949s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3393s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2556s for 90112 events => throughput is 3.53E+05 events/s +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.972724e+05 ) sec^-1 -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895701243878) differ by less than 2E-4 (2.255290776354002e-08) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.332654e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.349365e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -524,28 +370,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955503257827] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0162955503257880] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.8124s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8118s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.37E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.9201s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9190s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0010s for 8192 events => throughput is 7.94E+06 events/s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256161) and cuda (2.0162955503257827) differ by less than 2E-4 (1.9846613241725208e-10) +OK! xsec from fortran (2.0162955499256290) and hip (2.0162955503257880) differ by less than 2E-4 (1.9846257970357328e-10) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -557,67 +403,65 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895242795732] fbridge_mode=1 + [XSECTION] Cross section = 2.043 [2.0434895242795763] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.7514s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7444s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0070s for 90112 events => throughput is 1.29E+07 events/s + [COUNTERS] PROGRAM TOTAL : 4.7432s + [COUNTERS] Fortran Overhead ( 0 ) : 4.7321s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0111s for 90112 events => throughput is 8.11E+06 events/s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0434895240377569) and cuda (2.0434895242795732) differ by less than 2E-4 (1.183348974365117e-10) +OK! xsec from fortran (2.0434895240377502) and hip (2.0434895242795763) differ by less than 2E-4 (1.1833978241782006e-10) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.815756e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.302413e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.247313e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.958478e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.109600e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.540431e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.657132e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.493504e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.111219e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.548564e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.039865e+08 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.486657e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.111200e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.512117e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.771983e+07 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.031534e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt index 80269e77b1..f1b855b6c4 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx - -make USEBUILDDIR=1 BACKEND=cuda +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' - +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' + make USEBUILDDIR=1 BACKEND=cpp512y -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' OMP_NUM_THREADS= -DATE: 2024-05-16_06:00:18 +DATE: 2024-05-16_21:03:04 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.99e-07 [7.9896697955084370E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 1041 events) - [COUNTERS] PROGRAM TOTAL : 2.6807s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3392s - [COUNTERS] Fortran MEs ( 1 ) : 2.3415s for 8192 events => throughput is 3.50E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.7341s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3493s + [COUNTERS] Fortran MEs ( 1 ) : 1.3848s for 8192 events => throughput is 5.92E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.99e-07 [7.9896697955084370E-007] fbridge_mode=0 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.6731s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3387s - [COUNTERS] Fortran MEs ( 1 ) : 2.3344s for 8192 events => throughput is 3.51E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.6406s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2557s + [COUNTERS] Fortran MEs ( 1 ) : 1.3848s for 8192 events => throughput is 5.92E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /tmp/avalassi/output_smeftggtttt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x10_fortran > /tmp/valassia/output_smeftggtttt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.667e-07 [7.6668083551438261E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 27.7347s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8315s - [COUNTERS] Fortran MEs ( 1 ) : 25.9031s for 90112 events => throughput is 3.48E+03 events/s + [COUNTERS] PROGRAM TOTAL : 16.5982s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3360s + [COUNTERS] Fortran MEs ( 1 ) : 15.2622s for 90112 events => throughput is 5.90E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.99e-07 [7.9896697955084349E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 5.3198s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7672s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.5526s for 8192 events => throughput is 3.21E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3730s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3359s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.0371s for 8192 events => throughput is 4.02E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896697955084454E-007) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (7.9896697955084370E-007) and cpp (7.9896697955084349E-007) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551438187E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 31.9550s - [COUNTERS] Fortran Overhead ( 0 ) : 4.2173s - [COUNTERS] CudaCpp MEs ( 2 ) : 27.7377s for 90112 events => throughput is 3.25E+03 events/s + [COUNTERS] PROGRAM TOTAL : 25.7765s + [COUNTERS] Fortran Overhead ( 0 ) : 3.3566s + [COUNTERS] CudaCpp MEs ( 2 ) : 22.4199s for 90112 events => throughput is 4.02E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668083551438187E-007) differ by less than 3E-14 (5.551115123125783e-16) +OK! xsec from fortran (7.6668083551438261E-007) and cpp (7.6668083551438230E-007) differ by less than 3E-14 (4.440892098500626e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.410579e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.087419e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.412797e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.086030e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084412E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.99e-07 [7.9896697955084317E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.9225s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6149s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.3076s for 8192 events => throughput is 6.27E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.0686s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1665s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9021s for 8192 events => throughput is 9.08E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896697955084412E-007) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (7.9896697955084370E-007) and cpp (7.9896697955084317E-007) differ by less than 3E-14 (6.661338147750939e-16) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.667e-07 [7.6668083551438272E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 17.4719s - [COUNTERS] Fortran Overhead ( 0 ) : 3.0716s - [COUNTERS] CudaCpp MEs ( 2 ) : 14.4004s for 90112 events => throughput is 6.26E+03 events/s + [COUNTERS] PROGRAM TOTAL : 12.1684s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2328s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.9355s for 90112 events => throughput is 9.07E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668083551438230E-007) differ by less than 3E-14 (0.0) +OK! xsec from fortran (7.6668083551438261E-007) and cpp (7.6668083551438272E-007) differ by less than 3E-14 (2.220446049250313e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.486190e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.295119e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.480385e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.298168e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.99e-07 [7.9896697955084359E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.4971s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9161s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5810s for 8192 events => throughput is 1.41E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.0616s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6636s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3980s for 8192 events => throughput is 2.06E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896697955084454E-007) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (7.9896697955084370E-007) and cpp (7.9896697955084359E-007) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,190 +309,44 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.667e-07 [7.6668083551438261E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 8.7948s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3698s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.4250s for 90112 events => throughput is 1.40E+04 events/s + [COUNTERS] PROGRAM TOTAL : 6.0991s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7348s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.3643s for 90112 events => throughput is 2.06E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668083551438198E-007) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (7.6668083551438261E-007) and cpp (7.6668083551438261E-007) differ by less than 3E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.446474e+04 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.453779e+04 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.3443s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8339s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5104s for 8192 events => throughput is 1.60E+04 events/s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896697955084454E-007) differ by less than 3E-14 (2.220446049250313e-16) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 8.0840s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3189s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.7651s for 90112 events => throughput is 1.56E+04 events/s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668083551438198E-007) differ by less than 3E-14 (4.440892098500626e-16) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.657111e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.119923e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.653778e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.109682e+04 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.7114s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0187s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6926s for 8192 events => throughput is 1.18E+04 events/s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896697955084454E-007) differ by less than 3E-14 (2.220446049250313e-16) +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 10.1275s - [COUNTERS] Fortran Overhead ( 0 ) : 2.5284s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.5992s for 90112 events => throughput is 1.19E+04 events/s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668083551438198E-007) differ by less than 3E-14 (4.440892098500626e-16) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.225114e+04 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.220944e+04 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -504,28 +358,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.99e-07 [7.9896697955084359E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.8326s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8156s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0171s for 8192 events => throughput is 4.80E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7582s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7082s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0500s for 8192 events => throughput is 1.64E+05 events/s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084444E-007) and cuda (7.9896697955084454E-007) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (7.9896697955084370E-007) and hip (7.9896697955084359E-007) differ by less than 3E-14 (1.1102230246251565e-16) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -537,67 +391,65 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.667e-07 [7.6668083551438272E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 2.4753s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2868s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1885s for 90112 events => throughput is 4.78E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2748s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7270s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5478s for 90112 events => throughput is 1.65E+05 events/s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438230E-007) and cuda (7.6668083551438198E-007) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (7.6668083551438261E-007) and hip (7.6668083551438272E-007) differ by less than 3E-14 (2.220446049250313e-16) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.843300e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.667995e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.218089e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.660148e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.155842e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.214770e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.421655e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.754199e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.190444e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.210493e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.415946e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.219583e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.148629e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.211111e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.764175e+05 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.134286e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt index 45b154f6da..ab8828dfcd 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx - -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' OMP_NUM_THREADS= -DATE: 2024-05-16_06:02:57 +DATE: 2024-05-16_21:04:57 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.99e-07 [7.9896697955084370E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 1041 events) - [COUNTERS] PROGRAM TOTAL : 2.6694s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3400s - [COUNTERS] Fortran MEs ( 1 ) : 2.3294s for 8192 events => throughput is 3.52E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.6428s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2578s + [COUNTERS] Fortran MEs ( 1 ) : 1.3850s for 8192 events => throughput is 5.91E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.99e-07 [7.9896697955084370E-007] fbridge_mode=0 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.6748s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3390s - [COUNTERS] Fortran MEs ( 1 ) : 2.3358s for 8192 events => throughput is 3.51E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.6611s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2709s + [COUNTERS] Fortran MEs ( 1 ) : 1.3902s for 8192 events => throughput is 5.89E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /tmp/avalassi/output_smeftggtttt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x10_fortran > /tmp/valassia/output_smeftggtttt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.667e-07 [7.6668083551438261E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 27.4771s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8059s - [COUNTERS] Fortran MEs ( 1 ) : 25.6712s for 90112 events => throughput is 3.51E+03 events/s + [COUNTERS] PROGRAM TOTAL : 16.5795s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3451s + [COUNTERS] Fortran MEs ( 1 ) : 15.2345s for 90112 events => throughput is 5.92E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896785213255034E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.99e-07 [7.9896781657409323E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 5.0871s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6795s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.4076s for 8192 events => throughput is 3.40E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.9914s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1011s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.8903s for 8192 events => throughput is 4.33E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896785213255034E-007) differ by less than 4E-4 (1.0921373827521563e-06) +OK! xsec from fortran (7.9896697955084370E-007) and cpp (7.9896781657409323E-007) differ by less than 4E-4 (1.0476318432761644e-06) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668138359550833E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.667e-07 [7.6668135917139758E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 30.6384s - [COUNTERS] Fortran Overhead ( 0 ) : 4.1272s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.5112s for 90112 events => throughput is 3.40E+03 events/s + [COUNTERS] PROGRAM TOTAL : 23.8342s + [COUNTERS] Fortran Overhead ( 0 ) : 3.1725s + [COUNTERS] CudaCpp MEs ( 2 ) : 20.6617s for 90112 events => throughput is 4.36E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668138359550833E-007) differ by less than 4E-4 (7.148752136920677e-07) +OK! xsec from fortran (7.6668083551438261E-007) and cpp (7.6668135917139758E-007) differ by less than 4E-4 (6.830182661676787e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.518646e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.506422e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.519140e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.499843e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896766542858863E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.99e-07 [7.9896768403674554E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.6928s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0131s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6797s for 8192 events => throughput is 1.21E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.2200s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7417s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4783s for 8192 events => throughput is 1.71E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896766542858863E-007) differ by less than 4E-4 (8.584556829838164e-07) +OK! xsec from fortran (7.9896697955084370E-007) and cpp (7.9896768403674554E-007) differ by less than 4E-4 (8.817459542509454e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668121906848987E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.667e-07 [7.6668124704275563E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 9.9205s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4645s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.4560s for 90112 events => throughput is 1.21E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.0708s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8105s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.2603s for 90112 events => throughput is 1.71E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668121906848987E-007) differ by less than 4E-4 (5.002787206720427e-07) +OK! xsec from fortran (7.6668083551438261E-007) and cpp (7.6668124704275563E-007) differ by less than 4E-4 (5.367662185840061e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.232964e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.747376e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.231409e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.759961e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896764408326359E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.99e-07 [7.9896764767194441E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.9294s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6306s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2988s for 8192 events => throughput is 2.74E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6792s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4730s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2062s for 8192 events => throughput is 3.97E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896764408326359E-007) differ by less than 4E-4 (8.31739528805997e-07) +OK! xsec from fortran (7.9896697955084370E-007) and cpp (7.9896764767194441E-007) differ by less than 4E-4 (8.362311807452727e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,190 +309,44 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668124799901306E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.667e-07 [7.6668127564727394E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 5.3691s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0900s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.2791s for 90112 events => throughput is 2.75E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.8141s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5466s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.2675s for 90112 events => throughput is 3.97E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668124799901306E-007) differ by less than 4E-4 (5.380134884269694e-07) +OK! xsec from fortran (7.6668083551438261E-007) and cpp (7.6668127564727394E-007) differ by less than 4E-4 (5.740757704764121e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.797581e+04 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.799365e+04 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896764408326359E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.8582s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5958s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2624s for 8192 events => throughput is 3.12E+04 events/s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896764408326359E-007) differ by less than 4E-4 (8.31739528805997e-07) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668124799901306E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 4.9443s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0456s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.8987s for 90112 events => throughput is 3.11E+04 events/s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668124799901306E-007) differ by less than 4E-4 (5.380134884269694e-07) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.232867e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.133308e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.231460e+04 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896778056937195E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.0285s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6828s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3457s for 8192 events => throughput is 2.37E+04 events/s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896778056937195E-007) differ by less than 4E-4 (1.0025677505964836e-06) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.111803e+04 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668139178203571E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 5.9990s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1683s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.8307s for 90112 events => throughput is 2.35E+04 events/s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668139178203571E-007) differ by less than 4E-4 (7.255530953820255e-07) +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.399591e+04 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.392653e+04 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -504,28 +358,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896805369365078E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.99e-07 [7.9896777191982386E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.8276s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8136s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0140s for 8192 events => throughput is 5.86E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6713s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6454s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0259s for 8192 events => throughput is 3.17E+05 events/s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084444E-007) and cuda (7.9896805369365078E-007) differ by less than 4E-4 (1.3444145174901223e-06) +OK! xsec from fortran (7.9896697955084370E-007) and hip (7.9896777191982386E-007) differ by less than 4E-4 (9.917418370974929e-07) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -537,67 +391,65 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668194616292154E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.667e-07 [7.6668141007936531E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 2.4309s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2769s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1540s for 90112 events => throughput is 5.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9564s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6723s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2841s for 90112 events => throughput is 3.17E+05 events/s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438230E-007) and cuda (7.6668194616292154E-007) differ by less than 4E-4 (1.4486452351025747e-06) +OK! xsec from fortran (7.6668083551438261E-007) and hip (7.6668141007936531E-007) differ by less than 4E-4 (7.494187359569082e-07) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.229528e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.255455e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.512458e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.207584e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.376887e+06 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.176201e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.385341e+06 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.009662e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.353397e+06 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.174315e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.396782e+06 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.977217e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.372639e+06 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.171851e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.860961e+05 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.667254e+04 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt index 66daeb0e97..739e2ff231 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx - -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' OMP_NUM_THREADS= -DATE: 2024-05-16_06:05:05 +DATE: 2024-05-16_21:06:36 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.99e-07 [7.9896697955084370E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 1041 events) - [COUNTERS] PROGRAM TOTAL : 2.7059s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3465s - [COUNTERS] Fortran MEs ( 1 ) : 2.3593s for 8192 events => throughput is 3.47E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.6435s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2582s + [COUNTERS] Fortran MEs ( 1 ) : 1.3854s for 8192 events => throughput is 5.91E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.99e-07 [7.9896697955084370E-007] fbridge_mode=0 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.7035s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3438s - [COUNTERS] Fortran MEs ( 1 ) : 2.3597s for 8192 events => throughput is 3.47E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.6438s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2579s + [COUNTERS] Fortran MEs ( 1 ) : 1.3859s for 8192 events => throughput is 5.91E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /tmp/avalassi/output_smeftggtttt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x10_fortran > /tmp/valassia/output_smeftggtttt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.667e-07 [7.6668083551438261E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 27.5770s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8193s - [COUNTERS] Fortran MEs ( 1 ) : 25.7577s for 90112 events => throughput is 3.50E+03 events/s + [COUNTERS] PROGRAM TOTAL : 16.5842s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3403s + [COUNTERS] Fortran MEs ( 1 ) : 15.2439s for 90112 events => throughput is 5.91E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896696375074447E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.99e-07 [7.9896696375074458E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 5.3005s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7718s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.5286s for 8192 events => throughput is 3.24E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3380s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2885s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.0494s for 8192 events => throughput is 4.00E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896696375074447E-007) differ by less than 2E-4 (1.9775660775600556e-08) +OK! xsec from fortran (7.9896697955084370E-007) and cpp (7.9896696375074458E-007) differ by less than 2E-4 (1.9775659776399834e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668081976882373E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.667e-07 [7.6668081976882384E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 31.9857s - [COUNTERS] Fortran Overhead ( 0 ) : 4.2191s - [COUNTERS] CudaCpp MEs ( 2 ) : 27.7666s for 90112 events => throughput is 3.25E+03 events/s + [COUNTERS] PROGRAM TOTAL : 25.9227s + [COUNTERS] Fortran Overhead ( 0 ) : 3.3599s + [COUNTERS] CudaCpp MEs ( 2 ) : 22.5629s for 90112 events => throughput is 3.99E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668081976882373E-007) differ by less than 2E-4 (2.0537305522871918e-08) +OK! xsec from fortran (7.6668083551438261E-007) and cpp (7.6668081976882384E-007) differ by less than 2E-4 (2.0537305855938826e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.386546e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.082539e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.397002e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.089958e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896696285825688E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.99e-07 [7.9896696285825699E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.8744s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5934s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2810s for 8192 events => throughput is 6.40E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.0479s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1518s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8960s for 8192 events => throughput is 9.14E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896696285825688E-007) differ by less than 2E-4 (2.089271267102788e-08) +OK! xsec from fortran (7.9896697955084370E-007) and cpp (7.9896696285825699E-007) differ by less than 2E-4 (2.0892711671827158e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668081890954375E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.667e-07 [7.6668081890954439E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 17.1795s - [COUNTERS] Fortran Overhead ( 0 ) : 3.0414s - [COUNTERS] CudaCpp MEs ( 2 ) : 14.1381s for 90112 events => throughput is 6.37E+03 events/s + [COUNTERS] PROGRAM TOTAL : 12.0879s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2246s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.8633s for 90112 events => throughput is 9.14E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668081890954375E-007) differ by less than 2E-4 (2.1658084770059816e-08) +OK! xsec from fortran (7.6668083551438261E-007) and cpp (7.6668081890954439E-007) differ by less than 2E-4 (2.1658084325970606e-08) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.678262e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.370714e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.743588e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.376189e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,8 +276,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -285,13 +285,13 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896696427369838E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.4779s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9015s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5764s for 8192 events => throughput is 1.42E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.0792s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6832s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3961s for 8192 events => throughput is 2.07E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896696427369838E-007) differ by less than 2E-4 (1.9121123240317672e-08) +OK! xsec from fortran (7.9896697955084370E-007) and cpp (7.9896696427369838E-007) differ by less than 2E-4 (1.912112224111695e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,8 +309,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -318,181 +318,35 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668082030339872E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 8.7035s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3540s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.3495s for 90112 events => throughput is 1.42E+04 events/s + [COUNTERS] PROGRAM TOTAL : 6.0941s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7388s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.3553s for 90112 events => throughput is 2.07E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668082030339872E-007) differ by less than 2E-4 (1.984004671662376e-08) +OK! xsec from fortran (7.6668083551438261E-007) and cpp (7.6668082030339872E-007) differ by less than 2E-4 (1.984004716071297e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.454724e+04 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.476512e+04 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896696427369838E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.3378s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8314s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5064s for 8192 events => throughput is 1.62E+04 events/s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896696427369838E-007) differ by less than 2E-4 (1.9121123240317672e-08) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668082030339872E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 7.8877s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3042s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.5835s for 90112 events => throughput is 1.61E+04 events/s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668082030339872E-007) differ by less than 2E-4 (1.984004671662376e-08) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.679497e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.123477e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.670221e+04 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896696427369838E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.7201s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0244s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6957s for 8192 events => throughput is 1.18E+04 events/s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896696427369838E-007) differ by less than 2E-4 (1.9121123240317672e-08) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.126374e+04 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668082030339872E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 10.1095s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4855s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.6240s for 90112 events => throughput is 1.18E+04 events/s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668082030339872E-007) differ by less than 2E-4 (1.984004671662376e-08) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.204025e+04 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.203510e+04 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -504,28 +358,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697918297644E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.99e-07 [7.9896697918297697E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.8365s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8192s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0172s for 8192 events => throughput is 4.75E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7763s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7263s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0500s for 8192 events => throughput is 1.64E+05 events/s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084444E-007) and cuda (7.9896697918297644E-007) differ by less than 2E-4 (4.6042958334879813e-10) +OK! xsec from fortran (7.9896697955084370E-007) and hip (7.9896697918297697E-007) differ by less than 2E-4 (4.604279180142612e-10) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -537,67 +391,65 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551547592E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.667e-07 [7.6668083551547613E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 2.4662s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2768s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1894s for 90112 events => throughput is 4.76E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2813s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7304s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5509s for 90112 events => throughput is 1.64E+05 events/s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438230E-007) and cuda (7.6668083551547592E-007) differ by less than 2E-4 (1.4264145420384011e-12) +OK! xsec from fortran (7.6668083551438261E-007) and hip (7.6668083551547613E-007) differ by less than 2E-4 (1.426192497433476e-12) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.814879e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.662460e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.185918e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.656556e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.154361e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.210836e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.382253e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.749518e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.160102e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.207483e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.387193e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.216328e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.108981e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.201399e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.750323e+05 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.131995e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt index 059122dda6..8b8d5ea27e 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x - - -make USEBUILDDIR=1 BACKEND=cuda +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' + make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' OMP_NUM_THREADS= -DATE: 2024-05-16_06:00:05 +DATE: 2024-05-16_21:02:53 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,8 +49,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 685 events (found 2208 events) - [COUNTERS] PROGRAM TOTAL : 0.4148s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4054s - [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.76E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4498s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4439s + [COUNTERS] Fortran MEs ( 1 ) : 0.0059s for 8192 events => throughput is 1.39E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,8 +74,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 648 events (found 1275 events) - [COUNTERS] PROGRAM TOTAL : 0.3154s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3059s - [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2466s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2407s + [COUNTERS] Fortran MEs ( 1 ) : 0.0059s for 8192 events => throughput is 1.38E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /tmp/avalassi/output_susyggt1t1_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x10_fortran > /tmp/valassia/output_susyggt1t1_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.4762 [0.47620722822826000] fbridge_mode=0 + [XSECTION] Cross section = 0.4762 [0.47620722822826017] fbridge_mode=0 [UNWEIGHT] Wrote 1784 events (found 1789 events) - [COUNTERS] PROGRAM TOTAL : 1.3773s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2771s - [COUNTERS] Fortran MEs ( 1 ) : 0.1002s for 90112 events => throughput is 8.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.0920s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0296s + [COUNTERS] Fortran MEs ( 1 ) : 0.0623s for 90112 events => throughput is 1.45E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,15 +124,15 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 [XSECTION] ERROR! No cross section in log file: - /tmp/avalassi/output_susyggt1t1_x1_cudacpp + /tmp/valassia/output_susyggt1t1_x1_cudacpp ... xqcutij # 3> 0.0 0.0 RESET CUMULATIVE VARIABLE diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt index 01167da954..6b80cd42f0 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +make USEBUILDDIR=1 BACKEND=hip - - -make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' + make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' OMP_NUM_THREADS= -DATE: 2024-05-16_06:00:09 +DATE: 2024-05-16_21:02:57 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,8 +49,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 685 events (found 2208 events) - [COUNTERS] PROGRAM TOTAL : 0.4095s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4004s - [COUNTERS] Fortran MEs ( 1 ) : 0.0092s for 8192 events => throughput is 8.95E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3127s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3069s + [COUNTERS] Fortran MEs ( 1 ) : 0.0058s for 8192 events => throughput is 1.40E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,8 +74,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 648 events (found 1275 events) - [COUNTERS] PROGRAM TOTAL : 0.3199s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3104s - [COUNTERS] Fortran MEs ( 1 ) : 0.0095s for 8192 events => throughput is 8.61E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2478s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2419s + [COUNTERS] Fortran MEs ( 1 ) : 0.0059s for 8192 events => throughput is 1.40E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /tmp/avalassi/output_susyggt1t1_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x10_fortran > /tmp/valassia/output_susyggt1t1_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.4762 [0.47620722822826000] fbridge_mode=0 + [XSECTION] Cross section = 0.4762 [0.47620722822826017] fbridge_mode=0 [UNWEIGHT] Wrote 1784 events (found 1789 events) - [COUNTERS] PROGRAM TOTAL : 1.3992s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2973s - [COUNTERS] Fortran MEs ( 1 ) : 0.1018s for 90112 events => throughput is 8.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.0950s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0319s + [COUNTERS] Fortran MEs ( 1 ) : 0.0631s for 90112 events => throughput is 1.43E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,15 +124,15 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 [XSECTION] ERROR! No cross section in log file: - /tmp/avalassi/output_susyggt1t1_x1_cudacpp + /tmp/valassia/output_susyggt1t1_x1_cudacpp ... xqcutij # 3> 0.0 0.0 RESET CUMULATIVE VARIABLE diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt index 6c876298cd..a73952ea72 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +make USEBUILDDIR=1 BACKEND=hip -make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' + make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' OMP_NUM_THREADS= -DATE: 2024-05-16_06:00:14 +DATE: 2024-05-16_21:03:01 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,8 +49,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 685 events (found 2208 events) - [COUNTERS] PROGRAM TOTAL : 0.4113s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4020s - [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.76E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3085s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3027s + [COUNTERS] Fortran MEs ( 1 ) : 0.0058s for 8192 events => throughput is 1.41E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,8 +74,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 648 events (found 1275 events) - [COUNTERS] PROGRAM TOTAL : 0.3138s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3043s - [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.67E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2493s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2435s + [COUNTERS] Fortran MEs ( 1 ) : 0.0058s for 8192 events => throughput is 1.41E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /tmp/avalassi/output_susyggt1t1_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x10_fortran > /tmp/valassia/output_susyggt1t1_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.4762 [0.47620722822826000] fbridge_mode=0 + [XSECTION] Cross section = 0.4762 [0.47620722822826017] fbridge_mode=0 [UNWEIGHT] Wrote 1784 events (found 1789 events) - [COUNTERS] PROGRAM TOTAL : 1.3758s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2758s - [COUNTERS] Fortran MEs ( 1 ) : 0.1001s for 90112 events => throughput is 9.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.0959s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0335s + [COUNTERS] Fortran MEs ( 1 ) : 0.0623s for 90112 events => throughput is 1.45E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,15 +124,15 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 [XSECTION] ERROR! No cross section in log file: - /tmp/avalassi/output_susyggt1t1_x1_cudacpp + /tmp/valassia/output_susyggt1t1_x1_cudacpp ... xqcutij # 3> 0.0 0.0 RESET CUMULATIVE VARIABLE diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt index fd24a61552..44d76f3e44 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx - -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-05-16_05:59:47 +DATE: 2024-05-16_21:02:38 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 + [XSECTION] Cross section = 44.6 [44.598860065419849] fbridge_mode=0 [UNWEIGHT] Wrote 2620 events (found 5403 events) - [COUNTERS] PROGRAM TOTAL : 0.8237s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7798s - [COUNTERS] Fortran MEs ( 1 ) : 0.0440s for 8192 events => throughput is 1.86E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6960s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6674s + [COUNTERS] Fortran MEs ( 1 ) : 0.0287s for 8192 events => throughput is 2.86E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 + [XSECTION] Cross section = 44.6 [44.598860065419849] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4191s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3754s - [COUNTERS] Fortran MEs ( 1 ) : 0.0437s for 8192 events => throughput is 1.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3170s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2884s + [COUNTERS] Fortran MEs ( 1 ) : 0.0286s for 8192 events => throughput is 2.86E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp/avalassi/output_susyggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x10_fortran > /tmp/valassia/output_susyggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0 + [XSECTION] Cross section = 44.58 [44.577523870256471] fbridge_mode=0 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.8130s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3278s - [COUNTERS] Fortran MEs ( 1 ) : 0.4852s for 90112 events => throughput is 1.86E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3407s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0288s + [COUNTERS] Fortran MEs ( 1 ) : 0.3120s for 90112 events => throughput is 2.89E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,19 +124,19 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 171.8 [171.81273026311101] fbridge_mode=1 + [XSECTION] Cross section = 171.8 [171.81273026311092] fbridge_mode=1 [UNWEIGHT] Wrote 2338 events (found 3965 events) - [COUNTERS] PROGRAM TOTAL : 0.7007s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6611s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0395s for 8192 events => throughput is 2.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5425s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5103s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0322s for 8192 events => throughput is 2.54E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -ERROR! xsec from fortran (44.598860065419856) and cpp (171.81273026311101) differ by more than 3E-14 (2.852401832941188) +ERROR! xsec from fortran (44.598860065419849) and cpp (171.81273026311092) differ by more than 3E-14 (2.8524018329411867) diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt index 293718b73f..792d5133aa 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +make USEBUILDDIR=1 BACKEND=hip -make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone - +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-05-16_05:59:53 +DATE: 2024-05-16_21:02:43 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 + [XSECTION] Cross section = 44.6 [44.598860065419849] fbridge_mode=0 [UNWEIGHT] Wrote 2620 events (found 5403 events) - [COUNTERS] PROGRAM TOTAL : 0.8342s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7896s - [COUNTERS] Fortran MEs ( 1 ) : 0.0446s for 8192 events => throughput is 1.84E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5882s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5595s + [COUNTERS] Fortran MEs ( 1 ) : 0.0286s for 8192 events => throughput is 2.86E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 + [XSECTION] Cross section = 44.6 [44.598860065419849] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4178s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3733s - [COUNTERS] Fortran MEs ( 1 ) : 0.0444s for 8192 events => throughput is 1.84E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3184s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2899s + [COUNTERS] Fortran MEs ( 1 ) : 0.0285s for 8192 events => throughput is 2.87E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp/avalassi/output_susyggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x10_fortran > /tmp/valassia/output_susyggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0 + [XSECTION] Cross section = 44.58 [44.577523870256471] fbridge_mode=0 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.8125s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3299s - [COUNTERS] Fortran MEs ( 1 ) : 0.4826s for 90112 events => throughput is 1.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3414s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0272s + [COUNTERS] Fortran MEs ( 1 ) : 0.3142s for 90112 events => throughput is 2.87E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,19 +124,19 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 171.8 [171.81270286137041] fbridge_mode=1 + [XSECTION] Cross section = 171.8 [171.81269679287095] fbridge_mode=1 [UNWEIGHT] Wrote 2338 events (found 3965 events) - [COUNTERS] PROGRAM TOTAL : 0.7025s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6657s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0367s for 8192 events => throughput is 2.23E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5126s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4850s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0276s for 8192 events => throughput is 2.97E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -ERROR! xsec from fortran (44.598860065419856) and cpp (171.81270286137041) differ by more than 4E-4 (2.8524012185366816) +ERROR! xsec from fortran (44.598860065419849) and cpp (171.81269679287095) differ by more than 4E-4 (2.8524010824681945) diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt index f9ac9cdc3d..c8837e947f 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone - - +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-05-16_05:59:59 +DATE: 2024-05-16_21:02:48 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 + [XSECTION] Cross section = 44.6 [44.598860065419849] fbridge_mode=0 [UNWEIGHT] Wrote 2620 events (found 5403 events) - [COUNTERS] PROGRAM TOTAL : 0.8401s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7957s - [COUNTERS] Fortran MEs ( 1 ) : 0.0444s for 8192 events => throughput is 1.84E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5881s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5596s + [COUNTERS] Fortran MEs ( 1 ) : 0.0285s for 8192 events => throughput is 2.87E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 + [XSECTION] Cross section = 44.6 [44.598860065419849] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4268s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3820s - [COUNTERS] Fortran MEs ( 1 ) : 0.0448s for 8192 events => throughput is 1.83E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3180s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2895s + [COUNTERS] Fortran MEs ( 1 ) : 0.0286s for 8192 events => throughput is 2.87E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp/avalassi/output_susyggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x10_fortran > /tmp/valassia/output_susyggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0 + [XSECTION] Cross section = 44.58 [44.577523870256471] fbridge_mode=0 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.8476s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3594s - [COUNTERS] Fortran MEs ( 1 ) : 0.4881s for 90112 events => throughput is 1.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3405s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0289s + [COUNTERS] Fortran MEs ( 1 ) : 0.3116s for 90112 events => throughput is 2.89E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,19 +124,19 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 171.8 [171.81273490068889] fbridge_mode=1 + [XSECTION] Cross section = 171.8 [171.81273490068895] fbridge_mode=1 [UNWEIGHT] Wrote 2338 events (found 3965 events) - [COUNTERS] PROGRAM TOTAL : 0.7032s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6624s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0408s for 8192 events => throughput is 2.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5331s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5001s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0330s for 8192 events => throughput is 2.48E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -ERROR! xsec from fortran (44.598860065419856) and cpp (171.81273490068889) differ by more than 2E-4 (2.8524019369254128) +ERROR! xsec from fortran (44.598860065419849) and cpp (171.81273490068895) differ by more than 2E-4 (2.8524019369254145) From 32a5b40aaddbab9b9e42698427d01436ec7cf805 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 17 May 2024 08:46:56 +0200 Subject: [PATCH 46/48] [jtmk2] ** (ALMOST COMPLETE JTMK2) ** go back to itscrd90 logs again git checkout 5fec65cc9a8df54bbbb119d22af9fdbe1d33024e tput/logs_* tmad/logs_* This essentially completes my latest jtmk2 patches in the cudacpp code. This will only need a merge of jtmk into master before (assuming nothing else in needed in jtmk PR #798). --- .../log_eemumu_mad_d_inl0_hrd0.txt | 434 ++++++++++------ .../log_eemumu_mad_f_inl0_hrd0.txt | 442 ++++++++++------ .../log_eemumu_mad_m_inl0_hrd0.txt | 424 +++++++++++----- .../log_ggtt_mad_d_inl0_hrd0.txt | 434 ++++++++++------ .../log_ggtt_mad_f_inl0_hrd0.txt | 436 ++++++++++------ .../log_ggtt_mad_m_inl0_hrd0.txt | 426 ++++++++++------ .../log_ggttg_mad_d_inl0_hrd0.txt | 446 +++++++++++------ .../log_ggttg_mad_f_inl0_hrd0.txt | 448 +++++++++++------ .../log_ggttg_mad_m_inl0_hrd0.txt | 448 +++++++++++------ .../log_ggttgg_mad_d_inl0_hrd0.txt | 448 +++++++++++------ .../log_ggttgg_mad_f_inl0_hrd0.txt | 450 +++++++++++------ .../log_ggttgg_mad_m_inl0_hrd0.txt | 450 +++++++++++------ .../log_ggttggg_mad_d_inl0_hrd0.txt | 438 ++++++++++------ .../log_ggttggg_mad_f_inl0_hrd0.txt | 444 ++++++++++------ .../log_ggttggg_mad_m_inl0_hrd0.txt | 438 ++++++++++------ .../log_gqttq_mad_d_inl0_hrd0.txt | 470 ++++++++++++----- .../log_gqttq_mad_f_inl0_hrd0.txt | 472 +++++++++++++----- .../log_gqttq_mad_m_inl0_hrd0.txt | 471 ++++++++++++----- .../log_heftggbb_mad_d_inl0_hrd0.txt | 438 ++++++++++------ .../log_heftggbb_mad_f_inl0_hrd0.txt | 114 ++--- .../log_heftggbb_mad_m_inl0_hrd0.txt | 446 +++++++++++------ .../log_smeftggtttt_mad_d_inl0_hrd0.txt | 442 ++++++++++------ .../log_smeftggtttt_mad_f_inl0_hrd0.txt | 438 ++++++++++------ .../log_smeftggtttt_mad_m_inl0_hrd0.txt | 434 ++++++++++------ .../log_susyggt1t1_mad_d_inl0_hrd0.txt | 76 +-- .../log_susyggt1t1_mad_f_inl0_hrd0.txt | 76 +-- .../log_susyggt1t1_mad_m_inl0_hrd0.txt | 74 +-- .../log_susyggtt_mad_d_inl0_hrd0.txt | 86 ++-- .../log_susyggtt_mad_f_inl0_hrd0.txt | 86 ++-- .../log_susyggtt_mad_m_inl0_hrd0.txt | 88 ++-- .../log_eemumu_mad_d_inl0_hrd0.txt | 245 +++++---- .../log_eemumu_mad_d_inl0_hrd0_bridge.txt | 253 ++++++---- .../log_eemumu_mad_d_inl0_hrd0_common.txt | 231 +++++---- .../log_eemumu_mad_d_inl0_hrd0_rmbhst.txt | 248 +++++---- .../log_eemumu_mad_d_inl0_hrd1.txt | 241 +++++---- .../log_eemumu_mad_d_inl1_hrd0.txt | 245 +++++---- .../log_eemumu_mad_d_inl1_hrd1.txt | 245 +++++---- .../log_eemumu_mad_f_inl0_hrd0.txt | 255 ++++++---- .../log_eemumu_mad_f_inl0_hrd0_bridge.txt | 261 ++++++---- .../log_eemumu_mad_f_inl0_hrd0_common.txt | 241 +++++---- .../log_eemumu_mad_f_inl0_hrd0_rmbhst.txt | 256 ++++++---- .../log_eemumu_mad_f_inl0_hrd1.txt | 255 ++++++---- .../log_eemumu_mad_f_inl1_hrd0.txt | 255 ++++++---- .../log_eemumu_mad_f_inl1_hrd1.txt | 255 ++++++---- .../log_eemumu_mad_m_inl0_hrd0.txt | 241 +++++---- .../log_eemumu_mad_m_inl0_hrd1.txt | 241 +++++---- .../log_ggtt_mad_d_inl0_hrd0.txt | 245 +++++---- .../log_ggtt_mad_d_inl0_hrd0_bridge.txt | 253 ++++++---- .../log_ggtt_mad_d_inl0_hrd0_common.txt | 231 +++++---- .../log_ggtt_mad_d_inl0_hrd0_rmbhst.txt | 248 +++++---- .../log_ggtt_mad_d_inl0_hrd1.txt | 241 +++++---- .../log_ggtt_mad_d_inl1_hrd0.txt | 245 +++++---- .../log_ggtt_mad_d_inl1_hrd1.txt | 241 +++++---- .../log_ggtt_mad_f_inl0_hrd0.txt | 259 ++++++---- .../log_ggtt_mad_f_inl0_hrd0_bridge.txt | 267 ++++++---- .../log_ggtt_mad_f_inl0_hrd0_common.txt | 251 ++++++---- .../log_ggtt_mad_f_inl0_hrd0_rmbhst.txt | 262 ++++++---- .../log_ggtt_mad_f_inl0_hrd1.txt | 259 ++++++---- .../log_ggtt_mad_f_inl1_hrd0.txt | 259 ++++++---- .../log_ggtt_mad_f_inl1_hrd1.txt | 259 ++++++---- .../log_ggtt_mad_m_inl0_hrd0.txt | 245 +++++---- .../log_ggtt_mad_m_inl0_hrd1.txt | 245 +++++---- .../log_ggttg_mad_d_inl0_hrd0.txt | 270 +++++----- .../log_ggttg_mad_d_inl0_hrd0_bridge.txt | 280 ++++++----- .../log_ggttg_mad_d_inl0_hrd1.txt | 270 +++++----- .../log_ggttg_mad_f_inl0_hrd0.txt | 284 ++++++----- .../log_ggttg_mad_f_inl0_hrd0_bridge.txt | 294 ++++++----- .../log_ggttg_mad_f_inl0_hrd1.txt | 284 ++++++----- .../log_ggttg_mad_m_inl0_hrd0.txt | 266 +++++----- .../log_ggttg_mad_m_inl0_hrd1.txt | 266 +++++----- .../log_ggttgg_mad_d_inl0_hrd0.txt | 270 +++++----- .../log_ggttgg_mad_d_inl0_hrd0_bridge.txt | 280 ++++++----- .../log_ggttgg_mad_d_inl0_hrd0_common.txt | 254 ++++++---- .../log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt | 275 +++++----- .../log_ggttgg_mad_d_inl0_hrd1.txt | 270 +++++----- .../log_ggttgg_mad_d_inl1_hrd0.txt | 278 ++++++----- .../log_ggttgg_mad_d_inl1_hrd1.txt | 274 +++++----- .../log_ggttgg_mad_f_inl0_hrd0.txt | 286 ++++++----- .../log_ggttgg_mad_f_inl0_hrd0_bridge.txt | 296 ++++++----- .../log_ggttgg_mad_f_inl0_hrd0_common.txt | 280 ++++++----- .../log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt | 291 ++++++----- .../log_ggttgg_mad_f_inl0_hrd1.txt | 284 ++++++----- .../log_ggttgg_mad_f_inl1_hrd0.txt | 284 ++++++----- .../log_ggttgg_mad_f_inl1_hrd1.txt | 284 ++++++----- .../log_ggttgg_mad_m_inl0_hrd0.txt | 266 +++++----- .../log_ggttgg_mad_m_inl0_hrd1.txt | 266 +++++----- .../log_ggttggg_mad_d_inl0_hrd0.txt | 266 +++++----- .../log_ggttggg_mad_d_inl0_hrd0_bridge.txt | 276 +++++----- .../log_ggttggg_mad_d_inl0_hrd1.txt | 266 +++++----- .../log_ggttggg_mad_f_inl0_hrd0.txt | 282 ++++++----- .../log_ggttggg_mad_f_inl0_hrd0_bridge.txt | 292 ++++++----- .../log_ggttggg_mad_f_inl0_hrd1.txt | 282 ++++++----- .../log_ggttggg_mad_m_inl0_hrd0.txt | 266 +++++----- .../log_ggttggg_mad_m_inl0_hrd1.txt | 266 +++++----- .../log_gqttq_mad_d_inl0_hrd0.txt | 241 +++++++-- .../log_gqttq_mad_d_inl0_hrd0_bridge.txt | 254 ++++++++-- .../log_gqttq_mad_d_inl0_hrd1.txt | 241 +++++++-- .../log_gqttq_mad_f_inl0_hrd0.txt | 241 +++++++-- .../log_gqttq_mad_f_inl0_hrd0_bridge.txt | 254 ++++++++-- .../log_gqttq_mad_f_inl0_hrd1.txt | 241 +++++++-- .../log_gqttq_mad_m_inl0_hrd0.txt | 241 +++++++-- .../log_gqttq_mad_m_inl0_hrd1.txt | 241 +++++++-- .../log_heftggbb_mad_d_inl0_hrd0.txt | 241 +++++---- .../log_heftggbb_mad_d_inl0_hrd1.txt | 241 +++++---- .../log_heftggbb_mad_f_inl0_hrd0.txt | 261 ++++++---- .../log_heftggbb_mad_f_inl0_hrd1.txt | 261 ++++++---- .../log_heftggbb_mad_m_inl0_hrd0.txt | 241 +++++---- .../log_heftggbb_mad_m_inl0_hrd1.txt | 241 +++++---- .../log_smeftggtttt_mad_d_inl0_hrd0.txt | 270 +++++----- .../log_smeftggtttt_mad_d_inl0_hrd1.txt | 270 +++++----- .../log_smeftggtttt_mad_f_inl0_hrd0.txt | 286 ++++++----- .../log_smeftggtttt_mad_f_inl0_hrd1.txt | 286 ++++++----- .../log_smeftggtttt_mad_m_inl0_hrd0.txt | 266 +++++----- .../log_smeftggtttt_mad_m_inl0_hrd1.txt | 266 +++++----- .../log_susyggt1t1_mad_d_inl0_hrd0.txt | 241 +++++---- .../log_susyggt1t1_mad_d_inl0_hrd1.txt | 241 +++++---- .../log_susyggt1t1_mad_f_inl0_hrd0.txt | 251 ++++++---- .../log_susyggt1t1_mad_f_inl0_hrd1.txt | 251 ++++++---- .../log_susyggt1t1_mad_m_inl0_hrd0.txt | 245 +++++---- .../log_susyggt1t1_mad_m_inl0_hrd1.txt | 245 +++++---- .../log_susyggtt_mad_d_inl0_hrd0.txt | 249 +++++---- .../log_susyggtt_mad_d_inl0_hrd1.txt | 253 ++++++---- .../log_susyggtt_mad_f_inl0_hrd0.txt | 259 ++++++---- .../log_susyggtt_mad_f_inl0_hrd1.txt | 259 ++++++---- .../log_susyggtt_mad_m_inl0_hrd0.txt | 245 +++++---- .../log_susyggtt_mad_m_inl0_hrd1.txt | 245 +++++---- 126 files changed, 22204 insertions(+), 13507 deletions(-) diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index 1fe71e4d65..41d66d8253 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2024-05-16_20:43:11 +DATE: 2024-05-16_01:25:56 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661518E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3798 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.5477s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5417s - [COUNTERS] Fortran MEs ( 1 ) : 0.0060s for 8192 events => throughput is 1.37E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7231s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7147s + [COUNTERS] Fortran MEs ( 1 ) : 0.0084s for 8192 events => throughput is 9.76E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661518E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1346s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1287s - [COUNTERS] Fortran MEs ( 1 ) : 0.0059s for 8192 events => throughput is 1.39E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1857s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1771s + [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.54E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x10_fortran > /tmp/valassia/output_eemumu_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/output_eemumu_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x10_fortran > /tmp/v [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2774s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2147s - [COUNTERS] Fortran MEs ( 1 ) : 0.0627s for 90112 events => throughput is 1.44E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3949s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3024s + [COUNTERS] Fortran MEs ( 1 ) : 0.0925s for 90112 events => throughput is 9.74E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09338 [9.3382715404661545E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1582s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1522s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0060s for 8192 events => throughput is 1.36E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1913s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1844s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0069s for 8192 events => throughput is 1.18E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661518E-002) and cpp (9.3382715404661532E-002) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715404661545E-002) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515602020000780E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2876s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2218s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0658s for 90112 events => throughput is 1.37E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3835s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3081s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0754s for 90112 events => throughput is 1.20E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000780E-002) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000753E-002) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.404212e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.192081e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.361935e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.204613e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661518E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1464s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1425s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0039s for 8192 events => throughput is 2.12E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1864s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1822s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0042s for 8192 events => throughput is 1.95E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661518E-002) and cpp (9.3382715404661518E-002) differ by less than 3E-14 (0.0) +OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715404661532E-002) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,8 +233,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -242,9 +242,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2586s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2197s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0390s for 90112 events => throughput is 2.31E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3602s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3125s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0476s for 90112 events => throughput is 1.89E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -255,14 +255,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000753E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.394617e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.953763e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.428149e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.009226e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,8 +276,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -285,13 +285,13 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1339s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1314s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.28E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1833s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1802s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.59E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661518E-002) and cpp (9.3382715404661532E-002) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715404661532E-002) differ by less than 3E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,114 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2444s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2169s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0275s for 90112 events => throughput is 3.28E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3431s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3065s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0366s for 90112 events => throughput is 2.46E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000753E-002) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000739E-002) differ by less than 3E-14 (3.3306690738754696e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.417944e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.541984e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.533378e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.616899e+06 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1591 events (found 1595 events) + [COUNTERS] PROGRAM TOTAL : 0.1841s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1810s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.69E+06 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715404661532E-002) differ by less than 3E-14 (0.0) -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1782 events (found 1787 events) + [COUNTERS] PROGRAM TOTAL : 0.3388s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3042s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0346s for 90112 events => throughput is 2.61E+06 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000739E-002) differ by less than 3E-14 (3.3306690738754696e-16) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.662866e+06 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.883371e+06 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,28 +428,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 - [NGOODHEL] ngoodhel/ncomb = 16/16 +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661545E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.4581s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4577s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.75E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.1859s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1818s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0041s for 8192 events => throughput is 1.99E+06 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661518E-002) and hip (9.3382715404661545E-002) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715404661532E-002) differ by less than 3E-14 (0.0) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -391,65 +461,143 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 - [NGOODHEL] ngoodhel/ncomb = 16/16 +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1782 events (found 1787 events) + [COUNTERS] PROGRAM TOTAL : 0.3474s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3044s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0430s for 90112 events => throughput is 2.10E+06 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000739E-002) differ by less than 3E-14 (3.3306690738754696e-16) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.029340e+06 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.231218e+06 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1591 events (found 1595 events) + [COUNTERS] PROGRAM TOTAL : 0.6140s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6135s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.60E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.3382715404661532E-002) and cuda (9.3382715404661532E-002) differ by less than 3E-14 (0.0) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.5024s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4979s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0045s for 90112 events => throughput is 2.00E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7363s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7314s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0049s for 90112 events => throughput is 1.86E+07 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and hip (9.1515602020000753E-002) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (9.1515602020000766E-002) and cuda (9.1515602020000753E-002) differ by less than 3E-14 (1.1102230246251565e-16) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.155916e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.277665e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.548424e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.916168e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.205050e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.959957e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.861671e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.493136e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.220481e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.970202e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.950417e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.040191e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.191300e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.002261e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.568594e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.140061e+08 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index 0ed9fa9829..c4c8099bbf 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2024-05-16_20:43:22 +DATE: 2024-05-16_01:26:13 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661518E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3798 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.5129s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5070s - [COUNTERS] Fortran MEs ( 1 ) : 0.0059s for 8192 events => throughput is 1.39E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7287s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7200s + [COUNTERS] Fortran MEs ( 1 ) : 0.0087s for 8192 events => throughput is 9.42E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661518E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1364s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1305s - [COUNTERS] Fortran MEs ( 1 ) : 0.0059s for 8192 events => throughput is 1.39E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1869s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1783s + [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.57E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x10_fortran > /tmp/valassia/output_eemumu_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/output_eemumu_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x10_fortran > /tmp/v [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2788s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2163s - [COUNTERS] Fortran MEs ( 1 ) : 0.0625s for 90112 events => throughput is 1.44E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3902s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2987s + [COUNTERS] Fortran MEs ( 1 ) : 0.0916s for 90112 events => throughput is 9.84E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382701684199335E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09338 [9.3382703205998396E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1416s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1364s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0052s for 8192 events => throughput is 1.58E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1903s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1836s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0067s for 8192 events => throughput is 1.22E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661518E-002) and cpp (9.3382701684199335E-002) differ by less than 4E-4 (1.4692721372888684e-07) +OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382703205998396E-002) differ by less than 4E-4 (1.306308462512007e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515588842633111E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09152 [9.1515590123565249E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2754s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2183s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0571s for 90112 events => throughput is 1.58E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3825s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3093s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0732s for 90112 events => throughput is 1.23E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515588842633111E-002) differ by less than 4E-4 (1.439903947186849e-07) +OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515590123565249E-002) differ by less than 4E-4 (1.2999352305698153e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.646018e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.260929e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.653973e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.250210e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382719831741665E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09338 [9.3382700723828302E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1319s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1298s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0021s for 8192 events => throughput is 3.87E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1813s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1787s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.15E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661518E-002) and cpp (9.3382719831741665E-002) differ by less than 4E-4 (4.740791825774693e-08) +OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382700723828302E-002) differ by less than 4E-4 (1.5721146218172777e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515606481761602E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09152 [9.1515587612890761E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2386s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2154s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0232s for 90112 events => throughput is 3.88E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3288s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2997s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0290s for 90112 events => throughput is 3.10E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515606481761602E-002) differ by less than 4E-4 (4.875410031246474e-08) +OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515587612890761E-002) differ by less than 4E-4 (1.5742791048545257e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.034071e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.206836e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.150765e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.334282e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382719700521907E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09338 [9.3382700679354239E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1324s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1307s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.60E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1835s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1811s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0024s for 8192 events => throughput is 3.38E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661518E-002) and cpp (9.3382719700521907E-002) differ by less than 4E-4 (4.6002735842876064e-08) +OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382700679354239E-002) differ by less than 4E-4 (1.576877179942926e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,190 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515606480805645E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09152 [9.1515587619408464E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2352s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2156s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0196s for 90112 events => throughput is 4.60E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3304s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3034s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0270s for 90112 events => throughput is 3.34E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515606480805645E-002) differ by less than 4E-4 (4.874365444607065e-08) +OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515587619408464E-002) differ by less than 4E-4 (1.573566908996682e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.888044e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.522447e+06 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.386931e+06 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09338 [9.3382700679354239E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1591 events (found 1595 events) + [COUNTERS] PROGRAM TOTAL : 0.1829s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1805s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0024s for 8192 events => throughput is 3.41E+06 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382700679354239E-002) differ by less than 4E-4 (1.576877179942926e-07) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09152 [9.1515587619408464E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1782 events (found 1787 events) + [COUNTERS] PROGRAM TOTAL : 0.3301s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3033s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0268s for 90112 events => throughput is 3.37E+06 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515587619408464E-002) differ by less than 4E-4 (1.573566908996682e-07) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.528072e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.038351e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.720927e+06 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09338 [9.3382704335459282E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1591 events (found 1595 events) + [COUNTERS] PROGRAM TOTAL : 0.1844s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1819s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.21E+06 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382704335459282E-002) differ by less than 4E-4 (1.1853587900123586e-07) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09152 [9.1515591296252558E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1782 events (found 1787 events) + [COUNTERS] PROGRAM TOTAL : 0.3371s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3080s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0291s for 90112 events => throughput is 3.10E+06 events/s -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515591296252558E-002) differ by less than 4E-4 (1.1717945325173673e-07) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.341186e+06 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.598530e+06 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,28 +504,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 - [NGOODHEL] ngoodhel/ncomb = 16/16 +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382704338101225E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09338 [9.3382706077425631E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.4120s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4117s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0003s for 8192 events => throughput is 2.90E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6090s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6085s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.68E+07 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661518E-002) and hip (9.3382704338101225E-002) differ by less than 4E-4 (1.1850758729892164e-07) +OK! xsec from fortran (9.3382715404661532E-002) and cuda (9.3382706077425631E-002) differ by less than 4E-4 (9.988182347875352e-08) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -391,65 +537,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 - [NGOODHEL] ngoodhel/ncomb = 16/16 +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515591361999701E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09152 [9.1515592892887687E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.4986s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4956s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 90112 events => throughput is 3.00E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7344s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7297s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0046s for 90112 events => throughput is 1.95E+07 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and hip (9.1515591361999701E-002) differ by less than 4E-4 (1.1646102771045719e-07) +OK! xsec from fortran (9.1515602020000766E-002) and cuda (9.1515592892887687E-002) differ by less than 4E-4 (9.973286385633884e-08) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.728619e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.546893e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.078851e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.804903e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.339646e+08 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.477327e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.590578e+08 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.060127e+09 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.334954e+08 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.389797e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.676438e+08 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.251129e+09 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.104281e+08 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.752691e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.666333e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.481445e+08 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index 8961dc7493..fc86f120db 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -make USEBUILDDIR=1 BACKEND=hip -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2024-05-16_20:43:34 +DATE: 2024-05-16_01:26:29 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661518E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3798 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.5058s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5000s - [COUNTERS] Fortran MEs ( 1 ) : 0.0059s for 8192 events => throughput is 1.40E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7237s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7153s + [COUNTERS] Fortran MEs ( 1 ) : 0.0084s for 8192 events => throughput is 9.74E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661518E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1355s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1297s - [COUNTERS] Fortran MEs ( 1 ) : 0.0059s for 8192 events => throughput is 1.40E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1861s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1773s + [COUNTERS] Fortran MEs ( 1 ) : 0.0087s for 8192 events => throughput is 9.40E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x10_fortran > /tmp/valassia/output_eemumu_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/output_eemumu_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x10_fortran > /tmp/v [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2769s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2145s - [COUNTERS] Fortran MEs ( 1 ) : 0.0625s for 90112 events => throughput is 1.44E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4045s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3124s + [COUNTERS] Fortran MEs ( 1 ) : 0.0920s for 90112 events => throughput is 9.79E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715420701354E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09338 [9.3382715420701395E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1415s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1356s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0060s for 8192 events => throughput is 1.37E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1967s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1895s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0072s for 8192 events => throughput is 1.14E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661518E-002) and cpp (9.3382715420701354E-002) differ by less than 2E-4 (1.717646025412023e-10) +OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715420701395E-002) differ by less than 2E-4 (1.7176482458580722e-10) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,8 +157,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -166,9 +166,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602033080859E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2855s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2197s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0658s for 90112 events => throughput is 1.37E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3901s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3110s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0791s for 90112 events => throughput is 1.14E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -179,14 +179,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602033080859E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.407330e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.191141e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.423508e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.200935e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,8 +200,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -209,13 +209,13 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715420701354E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1363s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1328s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0035s for 8192 events => throughput is 2.35E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1857s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1815s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0041s for 8192 events => throughput is 1.98E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661518E-002) and cpp (9.3382715420701354E-002) differ by less than 2E-4 (1.717646025412023e-10) +OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715420701354E-002) differ by less than 2E-4 (1.7176438049659737e-10) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,8 +233,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -242,9 +242,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602033080859E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2558s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2174s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0384s for 90112 events => throughput is 2.35E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3486s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3030s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0456s for 90112 events => throughput is 1.98E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -255,14 +255,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602033080859E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.467655e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.010123e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.503347e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.071657e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,8 +276,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -285,13 +285,13 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1334s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1309s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.18E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1848s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1815s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.49E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661518E-002) and cpp (9.3382715383664494E-002) differ by less than 2E-4 (2.2484913930753692e-10) +OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715383664494E-002) differ by less than 2E-4 (2.2484925032983938e-10) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,8 +309,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -318,9 +318,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.2519s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2224s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0295s for 90112 events => throughput is 3.06E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3402s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3039s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0363s for 90112 events => throughput is 2.49E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -331,22 +331,92 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602022697845E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.346047e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.462700e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.422737e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.639506e+06 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1591 events (found 1595 events) + [COUNTERS] PROGRAM TOTAL : 0.1850s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1819s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.61E+06 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715383664494E-002) differ by less than 2E-4 (2.2484925032983938e-10) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1782 events (found 1787 events) + [COUNTERS] PROGRAM TOTAL : 0.3431s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3079s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0351s for 90112 events => throughput is 2.56E+06 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602022697845E-002) differ by less than 2E-4 (2.947131427788463e-11) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.637002e+06 ) sec^-1 -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.800572e+06 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,28 +428,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 - [NGOODHEL] ngoodhel/ncomb = 16/16 +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715392009222E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.4116s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4111s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.77E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.1865s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1828s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0037s for 8192 events => throughput is 2.21E+06 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661518E-002) and hip (9.3382715392009222E-002) differ by less than 2E-4 (1.3548862032308762e-10) +OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715383664494E-002) differ by less than 2E-4 (2.2484925032983938e-10) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -391,65 +461,143 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 - [NGOODHEL] ngoodhel/ncomb = 16/16 +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1782 events (found 1787 events) + [COUNTERS] PROGRAM TOTAL : 0.3487s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3069s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0418s for 90112 events => throughput is 2.16E+06 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602022697845E-002) differ by less than 2E-4 (2.947131427788463e-11) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.197743e+06 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.278261e+06 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09338 [9.3382715392009194E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1591 events (found 1595 events) + [COUNTERS] PROGRAM TOTAL : 0.6095s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6090s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.64E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.3382715404661532E-002) and cuda (9.3382715392009194E-002) differ by less than 2E-4 (1.3548906441229747e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602021089631E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.4970s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4925s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0045s for 90112 events => throughput is 2.02E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7369s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7319s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0050s for 90112 events => throughput is 1.81E+07 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and hip (9.1515602021089631E-002) differ by less than 2E-4 (1.1898038110302878e-11) +OK! xsec from fortran (9.1515602020000766E-002) and cuda (9.1515602021089631E-002) differ by less than 2E-4 (1.1898038110302878e-11) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.150838e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.356139e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.551007e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.953546e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.214814e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.960740e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.892921e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.522141e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.264688e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.009432e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.974527e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.090602e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.187799e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.972046e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.568186e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.157381e+08 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index f8550097b2..e1be7813b6 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-05-16_20:43:46 +DATE: 2024-05-16_01:26:45 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=0 + [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 2601 events (found 5405 events) - [COUNTERS] PROGRAM TOTAL : 0.6603s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6317s - [COUNTERS] Fortran MEs ( 1 ) : 0.0286s for 8192 events => throughput is 2.86E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8221s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7787s + [COUNTERS] Fortran MEs ( 1 ) : 0.0433s for 8192 events => throughput is 1.89E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=0 + [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3142s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2856s - [COUNTERS] Fortran MEs ( 1 ) : 0.0286s for 8192 events => throughput is 2.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4146s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3704s + [COUNTERS] Fortran MEs ( 1 ) : 0.0442s for 8192 events => throughput is 1.85E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x10_fortran > /tmp/valassia/output_ggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/output_ggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x10_fortran > /tmp/val [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.3363s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0237s - [COUNTERS] Fortran MEs ( 1 ) : 0.3126s for 90112 events => throughput is 2.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7473s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2714s + [COUNTERS] Fortran MEs ( 1 ) : 0.4758s for 90112 events => throughput is 1.89E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3667s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3346s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0321s for 8192 events => throughput is 2.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4516s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4120s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0396s for 8192 events => throughput is 2.07E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756626) and cpp (47.094184803756640) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (47.094184803756640) and cpp (47.094184803756647) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105695279989099] fbridge_mode=1 + [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4349s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0752s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3598s for 90112 events => throughput is 2.50E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8025s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3670s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4355s for 90112 events => throughput is 2.07E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989099) differ by less than 3E-14 (3.3306690738754696e-16) +OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989121) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.622701e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.132783e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.626013e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.139840e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756619] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3247s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3067s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0179s for 8192 events => throughput is 4.57E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4276s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4032s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0244s for 8192 events => throughput is 3.36E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756626) and cpp (47.094184803756619) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (47.094184803756640) and cpp (47.094184803756640) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105695279989085] fbridge_mode=1 + [XSECTION] Cross section = 47.11 [47.105695279989106] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.2437s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0464s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1973s for 90112 events => throughput is 4.57E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6069s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3541s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2528s for 90112 events => throughput is 3.56E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989085) differ by less than 3E-14 (5.551115123125783e-16) +OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989106) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.622662e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.613591e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.651962e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.632391e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3091s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2988s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0103s for 8192 events => throughput is 7.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3994s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3855s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0139s for 8192 events => throughput is 5.89E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756626) and cpp (47.094184803756640) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (47.094184803756640) and cpp (47.094184803756626) differ by less than 3E-14 (3.3306690738754696e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,190 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=1 + [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.1495s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0366s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1128s for 90112 events => throughput is 7.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5056s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3497s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1559s for 90112 events => throughput is 5.78E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989114) differ by less than 3E-14 (0.0) +OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989121) differ by less than 3E-14 (2.220446049250313e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.257087e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.889737e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.334302e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.921310e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=1 + [UNWEIGHT] Wrote 1603 events (found 1608 events) + [COUNTERS] PROGRAM TOTAL : 0.3995s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3870s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0125s for 8192 events => throughput is 6.55E+05 events/s -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (47.094184803756640) and cpp (47.094184803756626) differ by less than 3E-14 (3.3306690738754696e-16) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 + [UNWEIGHT] Wrote 1744 events (found 1749 events) + [COUNTERS] PROGRAM TOTAL : 1.4813s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3413s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1399s for 90112 events => throughput is 6.44E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989121) differ by less than 3E-14 (2.220446049250313e-16) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.578026e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.673606e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=1 + [UNWEIGHT] Wrote 1603 events (found 1608 events) + [COUNTERS] PROGRAM TOTAL : 0.4212s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3991s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0221s for 8192 events => throughput is 3.70E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.094184803756640) and cpp (47.094184803756626) differ by less than 3E-14 (3.3306690738754696e-16) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 + [UNWEIGHT] Wrote 1744 events (found 1749 events) + [COUNTERS] PROGRAM TOTAL : 1.5933s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3549s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2384s for 90112 events => throughput is 3.78E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989121) differ by less than 3E-14 (2.220446049250313e-16) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.816986e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.814285e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,8 +504,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -367,19 +513,19 @@ Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x1_c [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.5849s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5842s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.16E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.8124s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8118s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.42E+07 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756626) and hip (47.094184803756640) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (47.094184803756640) and cuda (47.094184803756640) differ by less than 3E-14 (0.0) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -391,8 +537,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -400,56 +546,58 @@ Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x10_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.3252s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3174s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0078s for 90112 events => throughput is 1.15E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7642s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7574s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0067s for 90112 events => throughput is 1.34E+07 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and hip (47.105695279989121) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (47.105695279989114) and cuda (47.105695279989121) differ by less than 3E-14 (2.220446049250313e-16) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.296184e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.120396e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.023795e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.622859e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.754752e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.177398e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.750038e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.080565e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.773188e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.172657e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.952920e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.155839e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.747788e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.173872e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.148755e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.068966e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index a5639eedd6..0b367d2d96 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-05-16_20:44:04 +DATE: 2024-05-16_01:27:12 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=0 + [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 2601 events (found 5405 events) - [COUNTERS] PROGRAM TOTAL : 0.5861s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5576s - [COUNTERS] Fortran MEs ( 1 ) : 0.0285s for 8192 events => throughput is 2.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8191s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7751s + [COUNTERS] Fortran MEs ( 1 ) : 0.0439s for 8192 events => throughput is 1.87E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=0 + [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3178s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2892s - [COUNTERS] Fortran MEs ( 1 ) : 0.0286s for 8192 events => throughput is 2.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4113s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3679s + [COUNTERS] Fortran MEs ( 1 ) : 0.0434s for 8192 events => throughput is 1.89E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x10_fortran > /tmp/valassia/output_ggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/output_ggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x10_fortran > /tmp/val [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.3422s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0298s - [COUNTERS] Fortran MEs ( 1 ) : 0.3124s for 90112 events => throughput is 2.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7478s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2691s + [COUNTERS] Fortran MEs ( 1 ) : 0.4787s for 90112 events => throughput is 1.88E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094178241446492] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094179780921394] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3434s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3159s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0275s for 8192 events => throughput is 2.98E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4476s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4108s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0368s for 8192 events => throughput is 2.23E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756626) and cpp (47.094178241446492) differ by less than 4E-4 (1.3934438314322506e-07) +OK! xsec from fortran (47.094184803756640) and cpp (47.094179780921394) differ by less than 4E-4 (1.0665510541407741e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105686930681671] fbridge_mode=1 + [XSECTION] Cross section = 47.11 [47.105688579298537] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.3644s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0602s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3042s for 90112 events => throughput is 2.96E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7717s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3659s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4058s for 90112 events => throughput is 2.22E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105686930681671) differ by less than 4E-4 (1.7724624157278157e-07) +OK! xsec from fortran (47.105695279989114) and cpp (47.105688579298537) differ by less than 4E-4 (1.4224799227413598e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.093104e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.257844e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.093112e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.292052e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094176373190514] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094175850060040] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3157s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3029s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0128s for 8192 events => throughput is 6.38E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4035s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3878s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0157s for 8192 events => throughput is 5.23E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756626) and cpp (47.094176373190514) differ by less than 4E-4 (1.7901501314643298e-07) +OK! xsec from fortran (47.094184803756640) and cpp (47.094175850060040) differ by less than 4E-4 (1.9012318908107062e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105685173093654] fbridge_mode=1 + [XSECTION] Cross section = 47.11 [47.105684763984058] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.1839s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0431s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1408s for 90112 events => throughput is 6.40E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5166s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3428s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1737s for 90112 events => throughput is 5.19E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105685173093654) differ by less than 4E-4 (2.1455782361901043e-07) +OK! xsec from fortran (47.105695279989114) and cpp (47.105684763984058) differ by less than 4E-4 (2.2324275217311396e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.532441e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.218996e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.807352e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.263655e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094174474272364] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094173652938650] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3033s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2968s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 8192 events => throughput is 1.24E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3915s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3833s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0082s for 8192 events => throughput is 1.00E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756626) and cpp (47.094174474272364) differ by less than 4E-4 (2.1933672500473733e-07) +OK! xsec from fortran (47.094184803756640) and cpp (47.094173652938650) differ by less than 4E-4 (2.3677696170398832e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,190 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105684585116684] fbridge_mode=1 + [XSECTION] Cross section = 47.11 [47.105684048677361] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.1035s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0342s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0693s for 90112 events => throughput is 1.30E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.4300s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3376s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0925s for 90112 events => throughput is 9.74E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105684585116684) differ by less than 4E-4 (2.2703990176786704e-07) +OK! xsec from fortran (47.105695279989114) and cpp (47.105684048677361) differ by less than 4E-4 (2.384278946498952e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.370067e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.896073e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.940492e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.09 [47.094173652938650] fbridge_mode=1 + [UNWEIGHT] Wrote 1603 events (found 1608 events) + [COUNTERS] PROGRAM TOTAL : 0.3898s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3819s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0079s for 8192 events => throughput is 1.04E+06 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.094184803756640) and cpp (47.094173652938650) differ by less than 4E-4 (2.3677696170398832e-07) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.11 [47.105684048677361] fbridge_mode=1 + [UNWEIGHT] Wrote 1744 events (found 1749 events) + [COUNTERS] PROGRAM TOTAL : 1.4168s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3312s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0856s for 90112 events => throughput is 1.05E+06 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.105695279989114) and cpp (47.105684048677361) differ by less than 4E-4 (2.384278946498952e-07) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.032975e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.383213e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.050779e+06 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.09 [47.094178213275804] fbridge_mode=1 + [UNWEIGHT] Wrote 1603 events (found 1608 events) + [COUNTERS] PROGRAM TOTAL : 0.3948s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3837s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0111s for 8192 events => throughput is 7.41E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.094184803756640) and cpp (47.094178213275804) differ by less than 4E-4 (1.3994256109484127e-07) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.11 [47.105688407939567] fbridge_mode=1 + [UNWEIGHT] Wrote 1744 events (found 1749 events) + [COUNTERS] PROGRAM TOTAL : 1.4646s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3397s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1249s for 90112 events => throughput is 7.22E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.105695279989114) and cpp (47.105688407939567) differ by less than 4E-4 (1.4588574703822133e-07) -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.304914e+05 ) sec^-1 -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.408593e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,28 +504,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094176770070867] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094184344050284] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.5742s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5738s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0004s for 8192 events => throughput is 2.09E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.8097s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8091s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.50E+07 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756626) and hip (47.094176770070867) differ by less than 4E-4 (1.705876382374072e-07) +OK! xsec from fortran (47.094184803756640) and cuda (47.094184344050284) differ by less than 4E-4 (9.761425112664313e-09) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -391,65 +537,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105687115703695] fbridge_mode=1 + [XSECTION] Cross section = 47.11 [47.105694586476879] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.3245s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3206s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0039s for 90112 events => throughput is 2.32E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7654s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7594s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0060s for 90112 events => throughput is 1.51E+07 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and hip (47.105687115703695) differ by less than 4E-4 (1.733184357144424e-07) +OK! xsec from fortran (47.105695279989114) and cuda (47.105694586476879) differ by less than 4E-4 (1.4722471020078842e-08) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.923699e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.397168e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.089089e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.912682e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.079244e+08 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.099083e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.979394e+08 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.785250e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.082611e+08 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.065057e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.073119e+08 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.885291e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.172672e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.649544e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.364268e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.436840e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index fd0e858a7b..197f6200da 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=hip -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-05-16_20:44:22 +DATE: 2024-05-16_01:27:37 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=0 + [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 2601 events (found 5405 events) - [COUNTERS] PROGRAM TOTAL : 0.7637s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7352s - [COUNTERS] Fortran MEs ( 1 ) : 0.0285s for 8192 events => throughput is 2.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8243s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7810s + [COUNTERS] Fortran MEs ( 1 ) : 0.0433s for 8192 events => throughput is 1.89E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=0 + [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3188s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2901s - [COUNTERS] Fortran MEs ( 1 ) : 0.0288s for 8192 events => throughput is 2.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4109s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3676s + [COUNTERS] Fortran MEs ( 1 ) : 0.0433s for 8192 events => throughput is 1.89E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x10_fortran > /tmp/valassia/output_ggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/output_ggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x10_fortran > /tmp/val [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.3410s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0285s - [COUNTERS] Fortran MEs ( 1 ) : 0.3125s for 90112 events => throughput is 2.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7536s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2749s + [COUNTERS] Fortran MEs ( 1 ) : 0.4787s for 90112 events => throughput is 1.88E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,8 +124,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -133,13 +133,13 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094186141863901] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3575s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3243s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0332s for 8192 events => throughput is 2.47E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4520s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4121s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0400s for 8192 events => throughput is 2.05E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756626) and cpp (47.094186141863901) differ by less than 2E-4 (2.8413428942997143e-08) +OK! xsec from fortran (47.094184803756640) and cpp (47.094186141863901) differ by less than 2E-4 (2.8413428720952538e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,8 +157,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -166,9 +166,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696630006634] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4261s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0618s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3643s for 90112 events => throughput is 2.47E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8139s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3740s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4399s for 90112 events => throughput is 2.05E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -179,14 +179,14 @@ OK! xsec from fortran (47.105695279989114) and cpp (47.105696630006634) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.234666e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.079375e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.455754e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.079476e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094186141863908] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094186141863901] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3252s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3074s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0178s for 8192 events => throughput is 4.61E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4154s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3930s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0224s for 8192 events => throughput is 3.67E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756626) and cpp (47.094186141863908) differ by less than 2E-4 (2.8413429165041748e-08) +OK! xsec from fortran (47.094184803756640) and cpp (47.094186141863901) differ by less than 2E-4 (2.8413428720952538e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,8 +233,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -242,9 +242,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696630006626] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.2519s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0490s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2029s for 90112 events => throughput is 4.44E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5989s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3477s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2513s for 90112 events => throughput is 3.59E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -255,14 +255,14 @@ OK! xsec from fortran (47.105695279989114) and cpp (47.105696630006626) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.661337e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.535235e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.698309e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.688874e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094186193208834] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3099s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2998s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0101s for 8192 events => throughput is 8.15E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4007s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3868s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0139s for 8192 events => throughput is 5.90E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756626) and cpp (47.094186193208834) differ by less than 2E-4 (2.9503689491505725e-08) +OK! xsec from fortran (47.094184803756640) and cpp (47.094186169585456) differ by less than 2E-4 (2.9002069412698006e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,114 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105696667630852] fbridge_mode=1 + [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.1506s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0399s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1108s for 90112 events => throughput is 8.13E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5000s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3471s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1530s for 90112 events => throughput is 5.89E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105696667630852) differ by less than 2E-4 (2.9458046002517335e-08) +OK! xsec from fortran (47.105695279989114) and cpp (47.105696663215774) differ by less than 2E-4 (2.9364318976377035e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.449938e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.781843e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.502608e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.986506e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1 + [UNWEIGHT] Wrote 1603 events (found 1608 events) + [COUNTERS] PROGRAM TOTAL : 0.4023s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3898s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0125s for 8192 events => throughput is 6.55E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.094184803756640) and cpp (47.094186169585456) differ by less than 2E-4 (2.9002069412698006e-08) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1 + [UNWEIGHT] Wrote 1744 events (found 1749 events) + [COUNTERS] PROGRAM TOTAL : 1.4677s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3316s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1361s for 90112 events => throughput is 6.62E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (47.105695279989114) and cpp (47.105696663215774) differ by less than 2E-4 (2.9364318976377035e-08) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.671424e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.782154e+05 ) sec^-1 -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,28 +428,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184798437837] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.5744s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5737s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.10E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.4150s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3944s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0206s for 8192 events => throughput is 3.98E+05 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756626) and hip (47.094184798437837) differ by less than 2E-4 (1.1293943558143837e-10) +OK! xsec from fortran (47.094184803756640) and cpp (47.094186169585456) differ by less than 2E-4 (2.9002069412698006e-08) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -391,8 +461,84 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1 + [UNWEIGHT] Wrote 1744 events (found 1749 events) + [COUNTERS] PROGRAM TOTAL : 1.5820s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3527s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2292s for 90112 events => throughput is 3.93E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.105695279989114) and cpp (47.105696663215774) differ by less than 2E-4 (2.9364318976377035e-08) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.998616e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.990048e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.09 [47.094184798437830] fbridge_mode=1 + [UNWEIGHT] Wrote 1603 events (found 1608 events) + [COUNTERS] PROGRAM TOTAL : 0.8154s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8148s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.45E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.094184803756640) and cuda (47.094184798437830) differ by less than 2E-4 (1.1293987967064822e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -400,56 +546,58 @@ Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x10_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279068492] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.3718s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3640s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0078s for 90112 events => throughput is 1.16E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7703s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7635s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0068s for 90112 events => throughput is 1.32E+07 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and hip (47.105695279068492) differ by less than 2E-4 (1.954369999168648e-11) +OK! xsec from fortran (47.105695279989114) and cuda (47.105695279068492) differ by less than 2E-4 (1.954369999168648e-11) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.270245e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.143723e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.032927e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.636090e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.810154e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.182886e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.798767e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.066867e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.802290e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.180722e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.004195e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.146460e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.771337e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.158610e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.171072e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.030823e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index a6dfae25cd..7f0ff41464 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2024-05-16_20:44:40 +DATE: 2024-05-16_01:28:04 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112317668354764] fbridge_mode=0 + [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 365 events (found 1496 events) - [COUNTERS] PROGRAM TOTAL : 0.5850s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3824s - [COUNTERS] Fortran MEs ( 1 ) : 0.2026s for 8192 events => throughput is 4.04E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7020s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3669s + [COUNTERS] Fortran MEs ( 1 ) : 0.3350s for 8192 events => throughput is 2.45E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112317668354764] fbridge_mode=0 + [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4570s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2542s - [COUNTERS] Fortran MEs ( 1 ) : 0.2027s for 8192 events => throughput is 4.04E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6627s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3273s + [COUNTERS] Fortran MEs ( 1 ) : 0.3354s for 8192 events => throughput is 2.44E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x10_fortran > /tmp/valassia/output_ggttg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/output_ggttg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9239236471252555E-002] fbridge_mode=0 - [UNWEIGHT] Wrote 1899 events (found 1904 events) - [COUNTERS] PROGRAM TOTAL : 3.4255s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2010s - [COUNTERS] Fortran MEs ( 1 ) : 2.2245s for 90112 events => throughput is 4.05E+04 events/s + [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 5.2831s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5823s + [COUNTERS] Fortran MEs ( 1 ) : 3.7008s for 90112 events => throughput is 2.43E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112317668354763] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.8439s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5616s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2822s for 8192 events => throughput is 2.90E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.0127s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6679s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3448s for 8192 events => throughput is 2.38E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112317668354764) and cpp (0.10112317668354763) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748607749110) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9239236471252514E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1899 events (found 1904 events) - [COUNTERS] PROGRAM TOTAL : 4.5951s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4889s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.1062s for 90112 events => throughput is 2.90E+04 events/s + [XSECTION] Cross section = 0.07924 [7.9238481932717666E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 5.7090s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9162s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.7928s for 90112 events => throughput is 2.38E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9239236471252555E-002) and cpp (7.9239236471252514E-002) differ by less than 3E-14 (5.551115123125783e-16) +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717666E-002) differ by less than 3E-14 (6.661338147750939e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.979402e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.444412e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.982821e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.433186e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112317668354515] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112748607748863] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5372s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3981s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1391s for 8192 events => throughput is 5.89E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6869s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5073s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1796s for 8192 events => throughput is 4.56E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112317668354764) and cpp (0.10112317668354515) differ by less than 3E-14 (2.475797344914099e-14) +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748607748863) differ by less than 3E-14 (2.453592884421596e-14) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9239236471252514E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1899 events (found 1904 events) - [COUNTERS] PROGRAM TOTAL : 2.8700s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3398s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.5302s for 90112 events => throughput is 5.89E+04 events/s + [XSECTION] Cross section = 0.07924 [7.9238481932717666E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 3.7766s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7859s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.9906s for 90112 events => throughput is 4.53E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9239236471252555E-002) and cpp (7.9239236471252514E-002) differ by less than 3E-14 (5.551115123125783e-16) +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717666E-002) differ by less than 3E-14 (6.661338147750939e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.074039e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.630294e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.093000e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.629345e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112317668354763] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.3911s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3256s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0655s for 8192 events => throughput is 1.25E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5178s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4262s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0916s for 8192 events => throughput is 8.94E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112317668354764) and cpp (0.10112317668354763) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748607749110) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,190 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9239236471252555E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1899 events (found 1904 events) - [COUNTERS] PROGRAM TOTAL : 2.0032s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2798s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7234s for 90112 events => throughput is 1.25E+05 events/s + [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 2.7021s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6979s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0042s for 90112 events => throughput is 8.97E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9239236471252555E-002) and cpp (7.9239236471252555E-002) differ by less than 3E-14 (0.0) +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717722E-002) differ by less than 3E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.257103e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.244917e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.258880e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.251295e+04 ) sec^-1 -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 + [UNWEIGHT] Wrote 386 events (found 1179 events) + [COUNTERS] PROGRAM TOTAL : 0.4953s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4129s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0824s for 8192 events => throughput is 9.95E+04 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748607749110) differ by less than 3E-14 (1.1102230246251565e-16) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 2.5931s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6918s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9014s for 90112 events => throughput is 1.00E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717722E-002) differ by less than 3E-14 (0.0) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.033892e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.035720e+05 ) sec^-1 -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 + [UNWEIGHT] Wrote 386 events (found 1179 events) + [COUNTERS] PROGRAM TOTAL : 0.5700s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4516s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1184s for 8192 events => throughput is 6.92E+04 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748607749110) differ by less than 3E-14 (1.1102230246251565e-16) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 3.0137s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7172s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2965s for 90112 events => throughput is 6.95E+04 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717722E-002) differ by less than 3E-14 (0.0) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.144179e+04 ) sec^-1 -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.156532e+04 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,28 +504,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112317668354760] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5780s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5704s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0076s for 8192 events => throughput is 1.08E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7726s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7672s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.52E+06 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112317668354764) and hip (0.10112317668354760) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (0.10112748607749111) and cuda (0.10112748607749111) differ by less than 3E-14 (0.0) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -391,65 +537,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9239236471252555E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1899 events (found 1904 events) - [COUNTERS] PROGRAM TOTAL : 1.6103s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5267s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0836s for 90112 events => throughput is 1.08E+06 events/s + [XSECTION] Cross section = 0.07924 [7.9238481932717736E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 2.0397s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0164s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0233s for 90112 events => throughput is 3.87E+06 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9239236471252555E-002) and hip (7.9239236471252555E-002) differ by less than 3E-14 (0.0) +OK! xsec from fortran (7.9238481932717722E-002) and cuda (7.9238481932717736E-002) differ by less than 3E-14 (2.220446049250313e-16) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.120771e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.642318e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.148193e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.930638e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.680474e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.882259e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.302244e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.244433e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.676513e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.893041e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.840744e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.255841e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.666133e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.907568e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.440619e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.774192e+06 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 724ff0291a..1a8c36aa43 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2024-05-16_20:45:11 +DATE: 2024-05-16_01:28:48 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112317668354764] fbridge_mode=0 + [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 365 events (found 1496 events) - [COUNTERS] PROGRAM TOTAL : 0.4828s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2806s - [COUNTERS] Fortran MEs ( 1 ) : 0.2022s for 8192 events => throughput is 4.05E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7057s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3678s + [COUNTERS] Fortran MEs ( 1 ) : 0.3379s for 8192 events => throughput is 2.42E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112317668354764] fbridge_mode=0 + [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4601s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2576s - [COUNTERS] Fortran MEs ( 1 ) : 0.2025s for 8192 events => throughput is 4.04E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6650s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3295s + [COUNTERS] Fortran MEs ( 1 ) : 0.3355s for 8192 events => throughput is 2.44E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x10_fortran > /tmp/valassia/output_ggttg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/output_ggttg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9239236471252555E-002] fbridge_mode=0 - [UNWEIGHT] Wrote 1899 events (found 1904 events) - [COUNTERS] PROGRAM TOTAL : 3.4225s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1981s - [COUNTERS] Fortran MEs ( 1 ) : 2.2244s for 90112 events => throughput is 4.05E+04 events/s + [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 5.3113s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5981s + [COUNTERS] Fortran MEs ( 1 ) : 3.7132s for 90112 events => throughput is 2.43E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112291597608296] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112722621426752] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.7804s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5209s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2595s for 8192 events => throughput is 3.16E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.9881s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6537s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3344s for 8192 events => throughput is 2.45E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112317668354764) and cpp (0.10112291597608296) differ by less than 4E-4 (2.5781178285555484e-06) +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112722621426752) differ by less than 4E-4 (2.569659680817793e-06) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9239221732791437E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1899 events (found 1904 events) - [COUNTERS] PROGRAM TOTAL : 4.3172s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4654s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.8518s for 90112 events => throughput is 3.16E+04 events/s + [XSECTION] Cross section = 0.07924 [7.9238468310179624E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 5.5951s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9082s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.6869s for 90112 events => throughput is 2.44E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9239236471252555E-002) and cpp (7.9239221732791437E-002) differ by less than 4E-4 (1.8599953477416165e-07) +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238468310179624E-002) differ by less than 4E-4 (1.719182115555995e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.253346e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.531268e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.249434e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.536337e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112290421591680] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112720710186394] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4211s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3421s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0790s for 8192 events => throughput is 1.04E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5295s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4297s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0998s for 8192 events => throughput is 8.21E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112317668354764) and cpp (0.10112290421591680) differ by less than 4E-4 (2.6944132867079418e-06) +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112720710186394) differ by less than 4E-4 (2.758652844936371e-06) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9239212368085274E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1899 events (found 1904 events) - [COUNTERS] PROGRAM TOTAL : 2.1138s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2809s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8329s for 90112 events => throughput is 1.08E+05 events/s + [XSECTION] Cross section = 0.07924 [7.9238454786658835E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 2.7829s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6780s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1050s for 90112 events => throughput is 8.16E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9239236471252555E-002) and cpp (7.9239212368085274E-002) differ by less than 4E-4 (3.0418222529693395e-07) +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238454786658835E-002) differ by less than 4E-4 (3.4258681169685445e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.092129e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.418674e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.100966e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.378970e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112291415112837] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112721766950902] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.3350s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3009s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0341s for 8192 events => throughput is 2.41E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4226s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3763s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0463s for 8192 events => throughput is 1.77E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112317668354764) and cpp (0.10112291415112837) differ by less than 4E-4 (2.5961646764605106e-06) +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112721766950902) differ by less than 4E-4 (2.654154597325764e-06) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,190 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9239211617250407E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1899 events (found 1904 events) - [COUNTERS] PROGRAM TOTAL : 1.6114s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2370s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3743s for 90112 events => throughput is 2.41E+05 events/s + [XSECTION] Cross section = 0.07924 [7.9238453735016964E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 2.1354s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6236s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5118s for 90112 events => throughput is 1.76E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9239236471252555E-002) and cpp (7.9239211617250407E-002) differ by less than 4E-4 (3.136577692020026e-07) +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238453735016964E-002) differ by less than 4E-4 (3.5585866953180556e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.453476e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.814902e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.818224e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.1011 [0.10112721766950902] fbridge_mode=1 + [UNWEIGHT] Wrote 386 events (found 1179 events) + [COUNTERS] PROGRAM TOTAL : 0.4122s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3704s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0417s for 8192 events => throughput is 1.96E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112721766950902) differ by less than 4E-4 (2.654154597325764e-06) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07924 [7.9238453735016964E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 2.0774s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6180s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4593s for 90112 events => throughput is 1.96E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238453735016964E-002) differ by less than 4E-4 (3.5585866953180556e-07) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.016267e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.457462e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.024583e+05 ) sec^-1 -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.1011 [0.10112723387847480] fbridge_mode=1 + [UNWEIGHT] Wrote 386 events (found 1179 events) + [COUNTERS] PROGRAM TOTAL : 0.4393s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3831s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0562s for 8192 events => throughput is 1.46E+05 events/s -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112723387847480) differ by less than 4E-4 (2.4938721023826105e-06) -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07924 [7.9238464410949921E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 2.2433s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6283s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6150s for 90112 events => throughput is 1.47E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238464410949921E-002) differ by less than 4E-4 (2.211270000440635e-07) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.471448e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.496104e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,28 +504,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112292787307366] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112726034625694] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6376s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6355s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0021s for 8192 events => throughput is 3.98E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7661s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7652s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0009s for 8192 events => throughput is 9.57E+06 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112317668354764) and hip (0.10112292787307366) differ by less than 4E-4 (2.4604693221741414e-06) +OK! xsec from fortran (0.10112748607749111) and cuda (0.10112726034625694) differ by less than 4E-4 (2.2321452152196386e-06) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -391,65 +537,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9239222545537072E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1899 events (found 1904 events) - [COUNTERS] PROGRAM TOTAL : 1.5598s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5376s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0222s for 90112 events => throughput is 4.06E+06 events/s + [XSECTION] Cross section = 0.07924 [7.9238473828077680E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 2.0217s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0116s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0101s for 90112 events => throughput is 8.94E+06 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9239236471252555E-002) and hip (7.9239222545537072E-002) differ by less than 4E-4 (1.7574267630049434e-07) +OK! xsec from fortran (7.9238481932717722E-002) and cuda (7.9238473828077680E-002) differ by less than 4E-4 (1.0228161673175862e-07) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.596097e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.279804e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.521296e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.849139e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.472410e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.708780e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.085314e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.376255e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.469858e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.741880e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.637900e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.526731e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.425874e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.576787e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.261945e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.628936e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index eb563a53e4..06cc385635 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -make USEBUILDDIR=1 BACKEND=hip + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2024-05-16_20:45:39 +DATE: 2024-05-16_01:29:27 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112317668354764] fbridge_mode=0 + [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 365 events (found 1496 events) - [COUNTERS] PROGRAM TOTAL : 0.4826s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2802s - [COUNTERS] Fortran MEs ( 1 ) : 0.2024s for 8192 events => throughput is 4.05E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7042s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3668s + [COUNTERS] Fortran MEs ( 1 ) : 0.3374s for 8192 events => throughput is 2.43E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112317668354764] fbridge_mode=0 + [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4610s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2587s - [COUNTERS] Fortran MEs ( 1 ) : 0.2023s for 8192 events => throughput is 4.05E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6678s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3296s + [COUNTERS] Fortran MEs ( 1 ) : 0.3383s for 8192 events => throughput is 2.42E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x10_fortran > /tmp/valassia/output_ggttg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/output_ggttg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9239236471252555E-002] fbridge_mode=0 - [UNWEIGHT] Wrote 1899 events (found 1904 events) - [COUNTERS] PROGRAM TOTAL : 3.4431s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2093s - [COUNTERS] Fortran MEs ( 1 ) : 2.2338s for 90112 events => throughput is 4.03E+04 events/s + [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 5.3035s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5927s + [COUNTERS] Fortran MEs ( 1 ) : 3.7109s for 90112 events => throughput is 2.43E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112317761225882] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112748700702684] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.8361s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5448s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2912s for 8192 events => throughput is 2.81E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.0211s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6702s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3509s for 8192 events => throughput is 2.33E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112317668354764) and cpp (0.10112317761225882) differ by less than 2E-4 (9.183959592817814e-09) +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748700702684) differ by less than 2E-4 (9.191721828116783e-09) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9239237217958461E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1899 events (found 1904 events) - [COUNTERS] PROGRAM TOTAL : 4.7112s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5198s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.1914s for 90112 events => throughput is 2.82E+04 events/s + [XSECTION] Cross section = 0.07924 [7.9238482679400354E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 5.7816s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9205s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.8611s for 90112 events => throughput is 2.33E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9239236471252555E-002) and cpp (7.9239237217958461E-002) differ by less than 2E-4 (9.4234364755863e-09) +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482679400354E-002) differ by less than 2E-4 (9.423232416594374e-09) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.898005e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.398467e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.864631e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.396772e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112317763556192] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112748702805033] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5299s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3925s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1374s for 8192 events => throughput is 5.96E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6812s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5038s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1774s for 8192 events => throughput is 4.62E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112317668354764) and cpp (0.10112317763556192) differ by less than 2E-4 (9.41440236879032e-09) +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748702805033) differ by less than 2E-4 (9.399612865834683e-09) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9239237221421968E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1899 events (found 1904 events) - [COUNTERS] PROGRAM TOTAL : 2.8737s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3594s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.5143s for 90112 events => throughput is 5.95E+04 events/s + [XSECTION] Cross section = 0.07924 [7.9238482683055667E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 3.6988s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7524s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.9464s for 90112 events => throughput is 4.63E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9239236471252555E-002) and cpp (7.9239237221421968E-002) differ by less than 2E-4 (9.467145956065792e-09) +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482683055667E-002) differ by less than 2E-4 (9.469362849401364e-09) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.146873e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.777911e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.162589e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.770421e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112317741957558] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112748681415580] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.3914s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3254s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0660s for 8192 events => throughput is 1.24E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5041s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4149s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0892s for 8192 events => throughput is 9.19E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112317668354764) and cpp (0.10112317741957558) differ by less than 2E-4 (7.278528668663853e-09) +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748681415580) differ by less than 2E-4 (7.284514991212632e-09) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,190 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9239237072275287E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1899 events (found 1904 events) - [COUNTERS] PROGRAM TOTAL : 2.0008s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2746s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7262s for 90112 events => throughput is 1.24E+05 events/s + [XSECTION] Cross section = 0.07924 [7.9238482534347232E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 2.6526s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6680s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9846s for 90112 events => throughput is 9.15E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9239236471252555E-002) and cpp (7.9239237072275287E-002) differ by less than 2E-4 (7.584913142011374e-09) +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482534347232E-002) differ by less than 2E-4 (7.592642958798024e-09) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.274026e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.374488e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.304457e+04 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.1011 [0.10112748681415580] fbridge_mode=1 + [UNWEIGHT] Wrote 386 events (found 1179 events) + [COUNTERS] PROGRAM TOTAL : 0.4867s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4079s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0788s for 8192 events => throughput is 1.04E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748681415580) differ by less than 2E-4 (7.284514991212632e-09) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07924 [7.9238482534347232E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 2.5251s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6551s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8699s for 90112 events => throughput is 1.04E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482534347232E-002) differ by less than 2E-4 (7.592642958798024e-09) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.072957e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.274917e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.074127e+05 ) sec^-1 -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.1011 [0.10112748700265108] fbridge_mode=1 + [UNWEIGHT] Wrote 386 events (found 1179 events) + [COUNTERS] PROGRAM TOTAL : 0.5672s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4475s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1197s for 8192 events => throughput is 6.85E+04 events/s -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748700265108) differ by less than 2E-4 (9.148451995955043e-09) -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07924 [7.9238482666076374E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 3.0147s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7041s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.3105s for 90112 events => throughput is 6.88E+04 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482666076374E-002) differ by less than 2E-4 (9.255082034087536e-09) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.810756e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.935663e+04 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,28 +504,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112317662375726] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112748601943165] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5947s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5871s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0076s for 8192 events => throughput is 1.08E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7757s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7703s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.51E+06 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112317668354764) and hip (0.10112317662375726) differ by less than 2E-4 (5.9126292750733e-10) +OK! xsec from fortran (0.10112748607749111) and cuda (0.10112748601943165) differ by less than 2E-4 (5.74121417074025e-10) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -391,65 +537,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9239236476482192E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1899 events (found 1904 events) - [COUNTERS] PROGRAM TOTAL : 1.5831s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4995s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0837s for 90112 events => throughput is 1.08E+06 events/s + [XSECTION] Cross section = 0.07924 [7.9238481937154381E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1898 events (found 1903 events) + [COUNTERS] PROGRAM TOTAL : 2.0407s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0176s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0230s for 90112 events => throughput is 3.91E+06 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9239236471252555E-002) and hip (7.9239236476482192E-002) differ by less than 2E-4 (6.599809587726213e-11) +OK! xsec from fortran (7.9238481932717722E-002) and cuda (7.9238481937154381E-002) differ by less than 2E-4 (5.5991211667105745e-11) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.119462e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.631069e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.147711e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.120692e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.673894e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.856212e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.302029e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.234939e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.681043e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.866138e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.840526e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.243613e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.662870e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.862499e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.434320e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.731505e+06 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index 99da4b0a3f..744dd47e66 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2024-05-16_20:46:11 +DATE: 2024-05-16_01:30:11 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8704143122579712E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 [UNWEIGHT] Wrote 62 events (found 950 events) - [COUNTERS] PROGRAM TOTAL : 2.8627s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3581s - [COUNTERS] Fortran MEs ( 1 ) : 2.5046s for 8192 events => throughput is 3.27E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.8074s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3646s + [COUNTERS] Fortran MEs ( 1 ) : 4.4427s for 8192 events => throughput is 1.84E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8704143122579712E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 2.7716s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2816s - [COUNTERS] Fortran MEs ( 1 ) : 2.4900s for 8192 events => throughput is 3.29E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.8076s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3603s + [COUNTERS] Fortran MEs ( 1 ) : 4.4473s for 8192 events => throughput is 1.84E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x10_fortran > /tmp/valassia/output_ggttgg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/output_ggttgg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793532411914656E-004] fbridge_mode=0 - [UNWEIGHT] Wrote 1358 events (found 1880 events) - [COUNTERS] PROGRAM TOTAL : 29.0583s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5955s - [COUNTERS] Fortran MEs ( 1 ) : 27.4629s for 90112 events => throughput is 3.28E+03 events/s + [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=0 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 50.3676s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1555s + [COUNTERS] Fortran MEs ( 1 ) : 48.2121s for 90112 events => throughput is 1.87E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8704143122579739E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8703612510102372E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 7.8333s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0232s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.8101s for 8192 events => throughput is 2.15E+03 events/s + [COUNTERS] PROGRAM TOTAL : 9.2196s + [COUNTERS] Fortran Overhead ( 0 ) : 4.7253s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.4943s for 8192 events => throughput is 1.82E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8704143122579712E-004) and cpp (3.8704143122579739E-004) differ by less than 3E-14 (6.661338147750939e-16) +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612510102372E-004) differ by less than 3E-14 (4.440892098500626e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793532411914648E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1358 events (found 1880 events) - [COUNTERS] PROGRAM TOTAL : 47.4261s - [COUNTERS] Fortran Overhead ( 0 ) : 5.3494s - [COUNTERS] CudaCpp MEs ( 2 ) : 42.0767s for 90112 events => throughput is 2.14E+03 events/s + [XSECTION] Cross section = 0.0001579 [1.5793438642451704E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 56.2683s + [COUNTERS] Fortran Overhead ( 0 ) : 6.5201s + [COUNTERS] CudaCpp MEs ( 2 ) : 49.7482s for 90112 events => throughput is 1.81E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793532411914656E-004) and cpp (1.5793532411914648E-004) differ by less than 3E-14 (5.551115123125783e-16) +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438642451704E-004) differ by less than 3E-14 (5.551115123125783e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.170234e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.868635e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.210789e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.874481e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8704143122579739E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8703612510102367E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 3.5775s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9100s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.6674s for 8192 events => throughput is 4.91E+03 events/s + [COUNTERS] PROGRAM TOTAL : 5.0549s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6552s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.3997s for 8192 events => throughput is 3.41E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8704143122579712E-004) and cpp (3.8704143122579739E-004) differ by less than 3E-14 (6.661338147750939e-16) +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612510102367E-004) differ by less than 3E-14 (2.220446049250313e-16) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793532411914648E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1358 events (found 1880 events) - [COUNTERS] PROGRAM TOTAL : 21.5173s - [COUNTERS] Fortran Overhead ( 0 ) : 3.2215s - [COUNTERS] CudaCpp MEs ( 2 ) : 18.2958s for 90112 events => throughput is 4.93E+03 events/s + [XSECTION] Cross section = 0.0001579 [1.5793438642451701E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 30.7729s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4162s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.3568s for 90112 events => throughput is 3.42E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793532411914656E-004) and cpp (1.5793532411914648E-004) differ by less than 3E-14 (5.551115123125783e-16) +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438642451701E-004) differ by less than 3E-14 (6.661338147750939e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.067248e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.598914e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.067497e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.606768e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8704143122579728E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8703612510102367E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 1.6695s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9666s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7029s for 8192 events => throughput is 1.17E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.4142s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3728s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0413s for 8192 events => throughput is 7.87E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8704143122579712E-004) and cpp (3.8704143122579728E-004) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612510102367E-004) differ by less than 3E-14 (2.220446049250313e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,190 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793532411914648E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1358 events (found 1880 events) - [COUNTERS] PROGRAM TOTAL : 10.0128s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2748s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.7380s for 90112 events => throughput is 1.16E+04 events/s + [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 14.6025s + [COUNTERS] Fortran Overhead ( 0 ) : 3.1268s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.4757s for 90112 events => throughput is 7.85E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793532411914656E-004) and cpp (1.5793532411914648E-004) differ by less than 3E-14 (5.551115123125783e-16) +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438642451712E-004) differ by less than 3E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.193787e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.082204e+03 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.087591e+03 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000387 [3.8703612510102367E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 121 events (found 923 events) + [COUNTERS] PROGRAM TOTAL : 2.1594s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2438s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9156s for 8192 events => throughput is 8.95E+03 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612510102367E-004) differ by less than 3E-14 (2.220446049250313e-16) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 13.1097s + [COUNTERS] Fortran Overhead ( 0 ) : 3.0095s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.1002s for 90112 events => throughput is 8.92E+03 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438642451712E-004) differ by less than 3E-14 (0.0) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.157056e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.204524e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.190937e+03 ) sec^-1 -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000387 [3.8703612510102367E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 121 events (found 923 events) + [COUNTERS] PROGRAM TOTAL : 2.7205s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5345s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1860s for 8192 events => throughput is 6.91E+03 events/s -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612510102367E-004) differ by less than 3E-14 (2.220446049250313e-16) -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 16.4021s + [COUNTERS] Fortran Overhead ( 0 ) : 3.3045s + [COUNTERS] CudaCpp MEs ( 2 ) : 13.0976s for 90112 events => throughput is 6.88E+03 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438642451712E-004) differ by less than 3E-14 (0.0) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.943395e+03 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.968493e+03 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,28 +504,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8704143122579723E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8703612510102367E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 0.9424s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8292s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1132s for 8192 events => throughput is 7.24E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.9037s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8707s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0330s for 8192 events => throughput is 2.48E+05 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8704143122579712E-004) and hip (3.8704143122579723E-004) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (3.8703612510102356E-004) and cuda (3.8703612510102367E-004) differ by less than 3E-14 (2.220446049250313e-16) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -391,65 +537,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793532411914653E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1358 events (found 1880 events) - [COUNTERS] PROGRAM TOTAL : 3.3277s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0786s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2491s for 90112 events => throughput is 7.21E+04 events/s + [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 2.9957s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6318s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3638s for 90112 events => throughput is 2.48E+05 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793532411914656E-004) and hip (1.5793532411914653E-004) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (1.5793438642451712E-004) and cuda (1.5793438642451712E-004) differ by less than 3E-14 (0.0) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.211368e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.275863e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.451928e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.513394e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.244902e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.126700e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.041753e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.163753e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.233879e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.128674e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.227863e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.183392e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.245185e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.130320e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.390943e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.455396e+05 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 0b8c0a4e92..97726609cd 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2024-05-16_20:49:52 +DATE: 2024-05-16_01:34:39 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8704143122579712E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 [UNWEIGHT] Wrote 62 events (found 950 events) - [COUNTERS] PROGRAM TOTAL : 2.7635s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2735s - [COUNTERS] Fortran MEs ( 1 ) : 2.4899s for 8192 events => throughput is 3.29E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.7506s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3591s + [COUNTERS] Fortran MEs ( 1 ) : 4.3916s for 8192 events => throughput is 1.87E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8704143122579712E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 2.7627s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2744s - [COUNTERS] Fortran MEs ( 1 ) : 2.4883s for 8192 events => throughput is 3.29E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.7327s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3562s + [COUNTERS] Fortran MEs ( 1 ) : 4.3765s for 8192 events => throughput is 1.87E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x10_fortran > /tmp/valassia/output_ggttgg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/output_ggttgg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793532411914656E-004] fbridge_mode=0 - [UNWEIGHT] Wrote 1358 events (found 1880 events) - [COUNTERS] PROGRAM TOTAL : 28.9764s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6009s - [COUNTERS] Fortran MEs ( 1 ) : 27.3755s for 90112 events => throughput is 3.29E+03 events/s + [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=0 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 50.4568s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1771s + [COUNTERS] Fortran MEs ( 1 ) : 48.2797s for 90112 events => throughput is 1.87E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8704259755238570E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8703729438336302E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 6.9987s - [COUNTERS] Fortran Overhead ( 0 ) : 3.6137s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.3850s for 8192 events => throughput is 2.42E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.9135s + [COUNTERS] Fortran Overhead ( 0 ) : 4.5714s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.3421s for 8192 events => throughput is 1.89E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8704143122579712E-004) and cpp (3.8704259755238570E-004) differ by less than 4E-4 (3.0134411834747965e-06) +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703729438336302E-004) differ by less than 4E-4 (3.021119383106452e-06) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793580182117605E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1358 events (found 1880 events) - [COUNTERS] PROGRAM TOTAL : 42.1241s - [COUNTERS] Fortran Overhead ( 0 ) : 4.8988s - [COUNTERS] CudaCpp MEs ( 2 ) : 37.2252s for 90112 events => throughput is 2.42E+03 events/s + [XSECTION] Cross section = 0.0001579 [1.5793486626492658E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 54.1190s + [COUNTERS] Fortran Overhead ( 0 ) : 6.3214s + [COUNTERS] CudaCpp MEs ( 2 ) : 47.7976s for 90112 events => throughput is 1.89E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793532411914656E-004) and cpp (1.5793580182117605E-004) differ by less than 4E-4 (3.024668687290344e-06) +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793486626492658E-004) differ by less than 4E-4 (3.0382263187522796e-06) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.473943e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.947180e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.477038e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.947353e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8704254541054809E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8703722581317850E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 1.9623s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1144s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8479s for 8192 events => throughput is 9.66E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7184s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5261s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1923s for 8192 events => throughput is 6.87E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8704143122579712E-004) and cpp (3.8704254541054809E-004) differ by less than 4E-4 (2.8787221757475834e-06) +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703722581317850E-004) differ by less than 4E-4 (2.843951981690296e-06) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793578161882866E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1358 events (found 1880 events) - [COUNTERS] PROGRAM TOTAL : 11.6999s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4337s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.2662s for 90112 events => throughput is 9.72E+03 events/s + [XSECTION] Cross section = 0.0001579 [1.5793483759856148E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 16.5068s + [COUNTERS] Fortran Overhead ( 0 ) : 3.3022s + [COUNTERS] CudaCpp MEs ( 2 ) : 13.2046s for 90112 events => throughput is 6.82E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793532411914656E-004) and cpp (1.5793578161882866E-004) differ by less than 4E-4 (2.896753368286653e-06) +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793483759856148E-004) differ by less than 4E-4 (2.856718252175483e-06) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.973910e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.978396e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.975004e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.983793e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8704254166302247E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8703722425602170E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 0.9782s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6223s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3560s for 8192 events => throughput is 2.30E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.4122s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8788s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5334s for 8192 events => throughput is 1.54E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8704143122579712E-004) and cpp (3.8704254166302247E-004) differ by less than 4E-4 (2.8690396836061893e-06) +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703722425602170E-004) differ by less than 4E-4 (2.8399286962077497e-06) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,190 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793578009696313E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1358 events (found 1880 events) - [COUNTERS] PROGRAM TOTAL : 5.8301s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9300s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.9001s for 90112 events => throughput is 2.31E+04 events/s + [XSECTION] Cross section = 0.0001579 [1.5793483698376133E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 8.5565s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6677s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.8888s for 90112 events => throughput is 1.53E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793532411914656E-004) and cpp (1.5793578009696313E-004) differ by less than 4E-4 (2.887117363403746e-06) +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793483698376133E-004) differ by less than 4E-4 (2.852825495613942e-06) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.390345e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.577584e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.577489e+04 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000387 [3.8703722425602170E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 121 events (found 923 events) + [COUNTERS] PROGRAM TOTAL : 1.2916s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8193s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4723s for 8192 events => throughput is 1.73E+04 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703722425602170E-004) differ by less than 4E-4 (2.8399286962077497e-06) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0001579 [1.5793483698376133E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 7.7633s + [COUNTERS] Fortran Overhead ( 0 ) : 2.5957s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.1676s for 90112 events => throughput is 1.74E+04 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793483698376133E-004) differ by less than 4E-4 (2.852825495613942e-06) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.818661e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.382982e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.824534e+04 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000387 [3.8703728658657426E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 121 events (found 923 events) + [COUNTERS] PROGRAM TOTAL : 1.5274s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9418s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5856s for 8192 events => throughput is 1.40E+04 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703728658657426E-004) differ by less than 4E-4 (3.0009745224379714e-06) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0001579 [1.5793486977281547E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 9.1749s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6987s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.4761s for 90112 events => throughput is 1.39E+04 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793486977281547E-004) differ by less than 4E-4 (3.0604373708609245e-06) -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.413533e+04 ) sec^-1 -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.415193e+04 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,28 +504,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8704261630635685E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8703736267486325E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 0.7928s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7366s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0562s for 8192 events => throughput is 1.46E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8657s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8443s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0214s for 8192 events => throughput is 3.83E+05 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8704143122579712E-004) and hip (3.8704261630635685E-004) differ by less than 4E-4 (3.0618958697381515e-06) +OK! xsec from fortran (3.8703612510102356E-004) and cuda (3.8703736267486325E-004) differ by less than 4E-4 (3.1975667371675343e-06) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -391,65 +537,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793580869662166E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1358 events (found 1880 events) - [COUNTERS] PROGRAM TOTAL : 2.6145s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9943s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6203s for 90112 events => throughput is 1.45E+05 events/s + [XSECTION] Cross section = 0.0001579 [1.5793489323670813E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 2.8423s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6065s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2358s for 90112 events => throughput is 3.82E+05 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793532411914656E-004) and hip (1.5793580869662166E-004) differ by less than 4E-4 (3.0682019858119247e-06) +OK! xsec from fortran (1.5793438642451712E-004) and cuda (1.5793489323670813E-004) differ by less than 4E-4 (3.20900471706409e-06) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.469589e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.583101e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.787102e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.931306e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.703222e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.570292e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.298408e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.724382e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.698763e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.573894e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.028870e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.720622e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.692814e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.518798e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.381352e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.538253e+05 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index c20ccea3fc..9161616d22 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2024-05-16_20:52:51 +DATE: 2024-05-16_01:38:10 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8704143122579712E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 [UNWEIGHT] Wrote 62 events (found 950 events) - [COUNTERS] PROGRAM TOTAL : 2.7671s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2738s - [COUNTERS] Fortran MEs ( 1 ) : 2.4933s for 8192 events => throughput is 3.29E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.7461s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3596s + [COUNTERS] Fortran MEs ( 1 ) : 4.3864s for 8192 events => throughput is 1.87E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8704143122579712E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 2.7707s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2809s - [COUNTERS] Fortran MEs ( 1 ) : 2.4899s for 8192 events => throughput is 3.29E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.7384s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3570s + [COUNTERS] Fortran MEs ( 1 ) : 4.3814s for 8192 events => throughput is 1.87E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x10_fortran > /tmp/valassia/output_ggttgg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/output_ggttgg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793532411914656E-004] fbridge_mode=0 - [UNWEIGHT] Wrote 1358 events (found 1880 events) - [COUNTERS] PROGRAM TOTAL : 28.9717s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5892s - [COUNTERS] Fortran MEs ( 1 ) : 27.3825s for 90112 events => throughput is 3.29E+03 events/s + [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=0 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 50.4226s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1630s + [COUNTERS] Fortran MEs ( 1 ) : 48.2596s for 90112 events => throughput is 1.87E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8704143272044121E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8703612659176674E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 7.8348s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0029s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.8319s for 8192 events => throughput is 2.14E+03 events/s + [COUNTERS] PROGRAM TOTAL : 9.4283s + [COUNTERS] Fortran Overhead ( 0 ) : 4.8187s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.6095s for 8192 events => throughput is 1.78E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8704143122579712E-004) and cpp (3.8704143272044121E-004) differ by less than 2E-4 (3.861716058040088e-09) +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612659176674E-004) differ by less than 2E-4 (3.851690077993908e-09) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793532474032691E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1358 events (found 1880 events) - [COUNTERS] PROGRAM TOTAL : 47.5780s - [COUNTERS] Fortran Overhead ( 0 ) : 5.3115s - [COUNTERS] CudaCpp MEs ( 2 ) : 42.2665s for 90112 events => throughput is 2.13E+03 events/s + [XSECTION] Cross section = 0.0001579 [1.5793438704534934E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 56.9207s + [COUNTERS] Fortran Overhead ( 0 ) : 6.6165s + [COUNTERS] CudaCpp MEs ( 2 ) : 50.3042s for 90112 events => throughput is 1.79E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793532411914656E-004) and cpp (1.5793532474032691E-004) differ by less than 2E-4 (3.933131154099101e-09) +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438704534934E-004) differ by less than 2E-4 (3.930950231989527e-09) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.211221e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.845749e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.210147e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.855084e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8704143304774347E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8703612692816703E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 3.5369s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8882s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.6487s for 8192 events => throughput is 4.97E+03 events/s + [COUNTERS] PROGRAM TOTAL : 5.0818s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6854s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.3963s for 8192 events => throughput is 3.42E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8704143122579712E-004) and cpp (3.8704143304774347E-004) differ by less than 2E-4 (4.707367828871156e-09) +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612692816703E-004) differ by less than 2E-4 (4.720860369289426e-09) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793532476698221E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1358 events (found 1880 events) - [COUNTERS] PROGRAM TOTAL : 21.2864s - [COUNTERS] Fortran Overhead ( 0 ) : 3.1918s - [COUNTERS] CudaCpp MEs ( 2 ) : 18.0946s for 90112 events => throughput is 4.98E+03 events/s + [XSECTION] Cross section = 0.0001579 [1.5793438707226035E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 30.7891s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4469s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.3422s for 90112 events => throughput is 3.42E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793532411914656E-004) and cpp (1.5793532476698221E-004) differ by less than 2E-4 (4.101904815811963e-09) +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438707226035E-004) differ by less than 2E-4 (4.1013439311399225e-09) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.124073e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.522464e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.105635e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.523204e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8704143287857844E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8703612675240517E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 1.6358s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9511s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6847s for 8192 events => throughput is 1.20E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.3962s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3623s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0339s for 8192 events => throughput is 7.92E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8704143122579712E-004) and cpp (3.8704143287857844E-004) differ by less than 2E-4 (4.2702956726259345e-09) +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612675240517E-004) differ by less than 2E-4 (4.266737629876616e-09) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,190 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793532473043530E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1358 events (found 1880 events) - [COUNTERS] PROGRAM TOTAL : 9.7635s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2583s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.5053s for 90112 events => throughput is 1.20E+04 events/s + [XSECTION] Cross section = 0.0001579 [1.5793438703631775E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 14.5036s + [COUNTERS] Fortran Overhead ( 0 ) : 3.1328s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.3708s for 90112 events => throughput is 7.92E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793532411914656E-004) and cpp (1.5793532473043530E-004) differ by less than 2E-4 (3.870500364655527e-09) +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438703631775E-004) differ by less than 2E-4 (3.873764420347925e-09) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.234140e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.125066e+03 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.074324e+03 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000387 [3.8703612675240517E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 121 events (found 923 events) + [COUNTERS] PROGRAM TOTAL : 2.1502s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2343s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9159s for 8192 events => throughput is 8.94E+03 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612675240517E-004) differ by less than 2E-4 (4.266737629876616e-09) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0001579 [1.5793438703631775E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 12.9975s + [COUNTERS] Fortran Overhead ( 0 ) : 3.0012s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.9962s for 90112 events => throughput is 9.01E+03 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438703631775E-004) differ by less than 2E-4 (3.873764420347925e-09) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.351869e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.234326e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.208773e+03 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000387 [3.8703612675240517E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 121 events (found 923 events) + [COUNTERS] PROGRAM TOTAL : 2.7414s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5394s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2020s for 8192 events => throughput is 6.82E+03 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612675240517E-004) differ by less than 2E-4 (4.266737629876616e-09) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0001579 [1.5793438703631775E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 16.5337s + [COUNTERS] Fortran Overhead ( 0 ) : 3.3003s + [COUNTERS] CudaCpp MEs ( 2 ) : 13.2334s for 90112 events => throughput is 6.81E+03 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438703631775E-004) differ by less than 2E-4 (3.873764420347925e-09) -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.881370e+03 ) sec^-1 -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.943865e+03 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,28 +504,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8704143124638075E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8703612512203166E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 0.9328s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8187s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1141s for 8192 events => throughput is 7.18E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.9014s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8685s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0329s for 8192 events => throughput is 2.49E+05 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8704143122579712E-004) and hip (3.8704143124638075E-004) differ by less than 2E-4 (5.318190332559425e-11) +OK! xsec from fortran (3.8703612510102356E-004) and cuda (3.8703612512203166E-004) differ by less than 2E-4 (5.427946980773868e-11) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -391,65 +537,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793532411887058E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 1358 events (found 1880 events) - [COUNTERS] PROGRAM TOTAL : 3.3216s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0701s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2516s for 90112 events => throughput is 7.20E+04 events/s + [XSECTION] Cross section = 0.0001579 [1.5793438642387717E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 1361 events (found 1881 events) + [COUNTERS] PROGRAM TOTAL : 2.9899s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6262s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3637s for 90112 events => throughput is 2.48E+05 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793532411914656E-004) and hip (1.5793532411887058E-004) differ by less than 2E-4 (1.7474910407599964e-12) +OK! xsec from fortran (1.5793438642451712E-004) and cuda (1.5793438642387717E-004) differ by less than 2E-4 (4.051980972974434e-12) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.192995e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.280457e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.462358e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.523385e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.248895e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.121733e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.020189e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.162091e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.244252e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.125438e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.225383e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.168234e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.244523e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.125929e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.379421e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.451563e+05 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index 193cac0b99..f87c8c9cf1 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2024-05-16_20:42:12 +DATE: 2024-05-16_01:43:56 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403628942014972E-006] fbridge_mode=0 + [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 54.5168s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3652s - [COUNTERS] Fortran MEs ( 1 ) : 54.1516s for 8192 events => throughput is 1.51E+02 events/s + [COUNTERS] PROGRAM TOTAL : 101.7235s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5049s + [COUNTERS] Fortran MEs ( 1 ) : 101.2186s for 8192 events => throughput is 8.09E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403628942014972E-006] fbridge_mode=0 + [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 54.6451s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3696s - [COUNTERS] Fortran MEs ( 1 ) : 54.2755s for 8192 events => throughput is 1.51E+02 events/s + [COUNTERS] PROGRAM TOTAL : 101.7703s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5109s + [COUNTERS] Fortran MEs ( 1 ) : 101.2594s for 8192 events => throughput is 8.09E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x10_fortran > /tmp/valassia/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322783648085419E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 600.0134s - [COUNTERS] Fortran Overhead ( 0 ) : 3.0257s - [COUNTERS] Fortran MEs ( 1 ) : 596.9877s for 90112 events => throughput is 1.51E+02 events/s + [COUNTERS] PROGRAM TOTAL : 1119.4272s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4297s + [COUNTERS] Fortran MEs ( 1 ) : 1114.9976s for 90112 events => throughput is 8.08E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403628942015001E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.24e-06 [1.2403985227939193E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 175.8983s - [COUNTERS] Fortran Overhead ( 0 ) : 80.1134s - [COUNTERS] CudaCpp MEs ( 2 ) : 95.7848s for 8192 events => throughput is 8.55E+01 events/s + [COUNTERS] PROGRAM TOTAL : 236.4243s + [COUNTERS] Fortran Overhead ( 0 ) : 108.7156s + [COUNTERS] CudaCpp MEs ( 2 ) : 127.7087s for 8192 events => throughput is 6.41E+01 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403628942014972E-006) and cpp (1.2403628942015001E-006) differ by less than 3E-14 (2.4424906541753444e-15) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985227939193E-006) differ by less than 3E-14 (1.5543122344752192e-15) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322783648085453E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1134.7119s - [COUNTERS] Fortran Overhead ( 0 ) : 82.6410s - [COUNTERS] CudaCpp MEs ( 2 ) : 1052.0709s for 90112 events => throughput is 8.57E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1507.2346s + [COUNTERS] Fortran Overhead ( 0 ) : 112.2012s + [COUNTERS] CudaCpp MEs ( 2 ) : 1395.0334s for 90112 events => throughput is 6.46E+01 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322783648085419E-007) and cpp (2.3322783648085453E-007) differ by less than 3E-14 (1.5543122344752192e-15) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656014E-007) differ by less than 3E-14 (1.9984014443252818e-15) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.033544e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.611342e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.031382e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.612518e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403628942015003E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.24e-06 [1.2403985227939197E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 81.8493s - [COUNTERS] Fortran Overhead ( 0 ) : 36.9991s - [COUNTERS] CudaCpp MEs ( 2 ) : 44.8502s for 8192 events => throughput is 1.83E+02 events/s + [COUNTERS] PROGRAM TOTAL : 114.8253s + [COUNTERS] Fortran Overhead ( 0 ) : 52.9129s + [COUNTERS] CudaCpp MEs ( 2 ) : 61.9124s for 8192 events => throughput is 1.32E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403628942014972E-006) and cpp (1.2403628942015003E-006) differ by less than 3E-14 (2.6645352591003757e-15) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985227939197E-006) differ by less than 3E-14 (1.7763568394002505e-15) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322783648085448E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.332e-07 [2.3322993086656017E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 531.7480s - [COUNTERS] Fortran Overhead ( 0 ) : 39.7110s - [COUNTERS] CudaCpp MEs ( 2 ) : 492.0370s for 90112 events => throughput is 1.83E+02 events/s + [COUNTERS] PROGRAM TOTAL : 734.6001s + [COUNTERS] Fortran Overhead ( 0 ) : 56.8950s + [COUNTERS] CudaCpp MEs ( 2 ) : 677.7051s for 90112 events => throughput is 1.33E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322783648085419E-007) and cpp (2.3322783648085448E-007) differ by less than 3E-14 (1.3322676295501878e-15) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656017E-007) differ by less than 3E-14 (2.220446049250313e-15) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.254594e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.573216e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.249864e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.570652e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403628942015001E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 35.4827s - [COUNTERS] Fortran Overhead ( 0 ) : 16.2185s - [COUNTERS] CudaCpp MEs ( 2 ) : 19.2643s for 8192 events => throughput is 4.25E+02 events/s + [COUNTERS] PROGRAM TOTAL : 53.5594s + [COUNTERS] Fortran Overhead ( 0 ) : 24.8692s + [COUNTERS] CudaCpp MEs ( 2 ) : 28.6902s for 8192 events => throughput is 2.86E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403628942014972E-006) and cpp (1.2403628942015001E-006) differ by less than 3E-14 (2.4424906541753444e-15) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985227939191E-006) differ by less than 3E-14 (1.3322676295501878e-15) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,190 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322783648085445E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 231.8248s - [COUNTERS] Fortran Overhead ( 0 ) : 18.8357s - [COUNTERS] CudaCpp MEs ( 2 ) : 212.9890s for 90112 events => throughput is 4.23E+02 events/s + [COUNTERS] PROGRAM TOTAL : 345.8816s + [COUNTERS] Fortran Overhead ( 0 ) : 28.6165s + [COUNTERS] CudaCpp MEs ( 2 ) : 317.2651s for 90112 events => throughput is 2.84E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322783648085419E-007) and cpp (2.3322783648085445E-007) differ by less than 3E-14 (1.1102230246251565e-15) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656014E-007) differ by less than 3E-14 (1.9984014443252818e-15) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.171079e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.346027e+02 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.397864e+02 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 70 events (found 407 events) + [COUNTERS] PROGRAM TOTAL : 47.5433s + [COUNTERS] Fortran Overhead ( 0 ) : 21.7991s + [COUNTERS] CudaCpp MEs ( 2 ) : 25.7442s for 8192 events => throughput is 3.18E+02 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985227939191E-006) differ by less than 3E-14 (1.3322676295501878e-15) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 303 events (found 1531 events) + [COUNTERS] PROGRAM TOTAL : 310.2994s + [COUNTERS] Fortran Overhead ( 0 ) : 25.7446s + [COUNTERS] CudaCpp MEs ( 2 ) : 284.5548s for 90112 events => throughput is 3.17E+02 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656014E-007) differ by less than 3E-14 (1.9984014443252818e-15) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.866314e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.226924e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.859864e+02 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 70 events (found 407 events) + [COUNTERS] PROGRAM TOTAL : 50.4926s + [COUNTERS] Fortran Overhead ( 0 ) : 24.7479s + [COUNTERS] CudaCpp MEs ( 2 ) : 25.7447s for 8192 events => throughput is 3.18E+02 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985227939191E-006) differ by less than 3E-14 (1.3322676295501878e-15) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 303 events (found 1531 events) + [COUNTERS] PROGRAM TOTAL : 313.6701s + [COUNTERS] Fortran Overhead ( 0 ) : 28.5625s + [COUNTERS] CudaCpp MEs ( 2 ) : 285.1075s for 90112 events => throughput is 3.16E+02 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656014E-007) differ by less than 3E-14 (1.9984014443252818e-15) -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.394651e+02 ) sec^-1 -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.384790e+02 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,28 +504,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403628942015003E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.24e-06 [1.2403985227939195E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 11.7059s - [COUNTERS] Fortran Overhead ( 0 ) : 7.8191s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.8867s for 8192 events => throughput is 2.11E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.2708s + [COUNTERS] Fortran Overhead ( 0 ) : 3.1879s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0829s for 8192 events => throughput is 7.56E+03 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403628942014972E-006) and hip (1.2403628942015003E-006) differ by less than 3E-14 (2.6645352591003757e-15) +OK! xsec from fortran (1.2403985227939174E-006) and cuda (1.2403985227939195E-006) differ by less than 3E-14 (1.7763568394002505e-15) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -391,65 +537,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322783648085437E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.332e-07 [2.3322993086656006E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 52.7736s - [COUNTERS] Fortran Overhead ( 0 ) : 10.2335s - [COUNTERS] CudaCpp MEs ( 2 ) : 42.5400s for 90112 events => throughput is 2.12E+03 events/s + [COUNTERS] PROGRAM TOTAL : 19.1407s + [COUNTERS] Fortran Overhead ( 0 ) : 7.1896s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.9511s for 90112 events => throughput is 7.54E+03 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322783648085419E-007) and hip (2.3322783648085437E-007) differ by less than 3E-14 (8.881784197001252e-16) +OK! xsec from fortran (2.3322993086655967E-007) and cuda (2.3322993086656006E-007) differ by less than 3E-14 (1.7763568394002505e-15) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.152133e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.518899e+03 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.191143e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.266687e+03 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.527095e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.285867e+03 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.455198e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.577065e+03 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.518810e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.302340e+03 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.486020e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.485177e+03 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.521294e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.239249e+03 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.125470e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.236704e+03 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index a47883d8b6..9938780c0a 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -make USEBUILDDIR=1 BACKEND=hip -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2024-05-16_21:37:36 +DATE: 2024-05-16_03:16:41 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403628942014972E-006] fbridge_mode=0 + [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 54.4965s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3691s - [COUNTERS] Fortran MEs ( 1 ) : 54.1274s for 8192 events => throughput is 1.51E+02 events/s + [COUNTERS] PROGRAM TOTAL : 101.6786s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5071s + [COUNTERS] Fortran MEs ( 1 ) : 101.1715s for 8192 events => throughput is 8.10E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403628942014972E-006] fbridge_mode=0 + [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 54.3426s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3756s - [COUNTERS] Fortran MEs ( 1 ) : 53.9670s for 8192 events => throughput is 1.52E+02 events/s + [COUNTERS] PROGRAM TOTAL : 102.1420s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5108s + [COUNTERS] Fortran MEs ( 1 ) : 101.6312s for 8192 events => throughput is 8.06E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x10_fortran > /tmp/valassia/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322783648085419E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 598.8564s - [COUNTERS] Fortran Overhead ( 0 ) : 3.0254s - [COUNTERS] Fortran MEs ( 1 ) : 595.8311s for 90112 events => throughput is 1.51E+02 events/s + [COUNTERS] PROGRAM TOTAL : 1119.6489s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4200s + [COUNTERS] Fortran MEs ( 1 ) : 1115.2289s for 90112 events => throughput is 8.08E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,23 +124,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.241e-06 [1.2405363572559468E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.241e-06 [1.2405719957040752E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 162.8380s - [COUNTERS] Fortran Overhead ( 0 ) : 74.6711s - [COUNTERS] CudaCpp MEs ( 2 ) : 88.1669s for 8192 events => throughput is 9.29E+01 events/s + [COUNTERS] PROGRAM TOTAL : 207.9761s + [COUNTERS] Fortran Overhead ( 0 ) : 95.5518s + [COUNTERS] CudaCpp MEs ( 2 ) : 112.4243s for 8192 events => throughput is 7.29E+01 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403628942014972E-006) and cpp (1.2405363572559468E-006) differ by less than 4E-4 (0.00013984863241267576) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2405719957040752E-006) differ by less than 4E-4 (0.00013985256106807675) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -158,37 +158,37 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.333e-07 [2.3326080615569212E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.333e-07 [2.3326290771198648E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1045.1681s - [COUNTERS] Fortran Overhead ( 0 ) : 77.2023s - [COUNTERS] CudaCpp MEs ( 2 ) : 967.9658s for 90112 events => throughput is 9.31E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1342.0233s + [COUNTERS] Fortran Overhead ( 0 ) : 99.5419s + [COUNTERS] CudaCpp MEs ( 2 ) : 1242.4814s for 90112 events => throughput is 7.25E+01 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322783648085419E-007) and cpp (2.3326080615569212E-007) differ by less than 4E-4 (0.00014136252059526733) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326290771198648E-007) differ by less than 4E-4 (0.00014139199589124907) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.108327e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.627892e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.104949e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.617246e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -202,23 +202,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.241e-06 [1.2405361288903015E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.241e-06 [1.2405717007921116E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 39.8100s - [COUNTERS] Fortran Overhead ( 0 ) : 18.3061s - [COUNTERS] CudaCpp MEs ( 2 ) : 21.5039s for 8192 events => throughput is 3.81E+02 events/s + [COUNTERS] PROGRAM TOTAL : 52.4912s + [COUNTERS] Fortran Overhead ( 0 ) : 24.8093s + [COUNTERS] CudaCpp MEs ( 2 ) : 27.6819s for 8192 events => throughput is 2.96E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403628942014972E-006) and cpp (1.2405361288903015E-006) differ by less than 4E-4 (0.0001396645204514435) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2405717007921116E-006) differ by less than 4E-4 (0.00013961480525170877) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -236,37 +236,37 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.333e-07 [2.3326076878598447E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.333e-07 [2.3326284900828787E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 256.1064s - [COUNTERS] Fortran Overhead ( 0 ) : 21.0308s - [COUNTERS] CudaCpp MEs ( 2 ) : 235.0755s for 90112 events => throughput is 3.83E+02 events/s + [COUNTERS] PROGRAM TOTAL : 333.5578s + [COUNTERS] Fortran Overhead ( 0 ) : 28.7441s + [COUNTERS] CudaCpp MEs ( 2 ) : 304.8137s for 90112 events => throughput is 2.96E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322783648085419E-007) and cpp (2.3326076878598447E-007) differ by less than 4E-4 (0.00014120229226155523) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326284900828787E-007) differ by less than 4E-4 (0.00014114029707035236) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.607312e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.354565e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.620181e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.352919e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -280,23 +280,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.241e-06 [1.2405360895331841E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.241e-06 [1.2405716659252656E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 18.0753s - [COUNTERS] Fortran Overhead ( 0 ) : 8.3409s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.7343s for 8192 events => throughput is 8.42E+02 events/s + [COUNTERS] PROGRAM TOTAL : 26.9353s + [COUNTERS] Fortran Overhead ( 0 ) : 12.5805s + [COUNTERS] CudaCpp MEs ( 2 ) : 14.3549s for 8192 events => throughput is 5.71E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403628942014972E-006) and cpp (1.2405360895331841E-006) differ by less than 4E-4 (0.00013963279012663143) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2405716659252656E-006) differ by less than 4E-4 (0.00013958669586155992) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -314,45 +314,195 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.333e-07 [2.3326069099562333E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.333e-07 [2.3326277036840957E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 117.1292s - [COUNTERS] Fortran Overhead ( 0 ) : 11.0028s - [COUNTERS] CudaCpp MEs ( 2 ) : 106.1264s for 90112 events => throughput is 8.49E+02 events/s + [COUNTERS] PROGRAM TOTAL : 175.5244s + [COUNTERS] Fortran Overhead ( 0 ) : 16.6137s + [COUNTERS] CudaCpp MEs ( 2 ) : 158.9107s for 90112 events => throughput is 5.67E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322783648085419E-007) and cpp (2.3326069099562333E-007) differ by less than 4E-4 (0.00014086875419705436) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326277036840957E-007) differ by less than 4E-4 (0.00014080311959907554) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.038176e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.796719e+02 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.813418e+02 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.241e-06 [1.2405716659252656E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 70 events (found 407 events) + [COUNTERS] PROGRAM TOTAL : 24.0035s + [COUNTERS] Fortran Overhead ( 0 ) : 11.1136s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.8899s for 8192 events => throughput is 6.36E+02 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2405716659252656E-006) differ by less than 4E-4 (0.00013958669586155992) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.333e-07 [2.3326277036840957E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 303 events (found 1531 events) + [COUNTERS] PROGRAM TOTAL : 155.2962s + [COUNTERS] Fortran Overhead ( 0 ) : 15.0900s + [COUNTERS] CudaCpp MEs ( 2 ) : 140.2063s for 90112 events => throughput is 6.43E+02 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326277036840957E-007) differ by less than 4E-4 (0.00014080311959907554) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.795563e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.039143e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.781198e+02 ) sec^-1 -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.241e-06 [1.2405719306052570E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 70 events (found 407 events) + [COUNTERS] PROGRAM TOTAL : 25.3167s + [COUNTERS] Fortran Overhead ( 0 ) : 12.6125s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.7041s for 8192 events => throughput is 6.45E+02 events/s -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2405719306052570E-006) differ by less than 4E-4 (0.00013980007888836354) -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.333e-07 [2.3326283660088769E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 303 events (found 1531 events) + [COUNTERS] PROGRAM TOTAL : 157.3576s + [COUNTERS] Fortran Overhead ( 0 ) : 16.6484s + [COUNTERS] CudaCpp MEs ( 2 ) : 140.7092s for 90112 events => throughput is 6.40E+02 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326283660088769E-007) differ by less than 4E-4 (0.00014108709892313165) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.841550e+02 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.830427e+02 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -364,28 +514,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.241e-06 [1.2405363557292459E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.241e-06 [1.2405722175509512E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 6.2636s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4305s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8331s for 8192 events => throughput is 4.47E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.5511s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0591s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4920s for 8192 events => throughput is 1.66E+04 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403628942014972E-006) and hip (1.2405363557292459E-006) differ by less than 4E-4 (0.00013984740156258724) +OK! xsec from fortran (1.2403985227939174E-006) and cuda (1.2405722175509512E-006) differ by less than 4E-4 (0.00014003141235829908) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -397,65 +547,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.333e-07 [2.3326074784076956E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.333e-07 [2.3326296967941821E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 27.0950s - [COUNTERS] Fortran Overhead ( 0 ) : 6.8776s - [COUNTERS] CudaCpp MEs ( 2 ) : 20.2175s for 90112 events => throughput is 4.46E+03 events/s + [COUNTERS] PROGRAM TOTAL : 11.4353s + [COUNTERS] Fortran Overhead ( 0 ) : 6.0032s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.4321s for 90112 events => throughput is 1.66E+04 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322783648085419E-007) and hip (2.3326074784076956E-007) differ by less than 4E-4 (0.00014111248645076735) +OK! xsec from fortran (2.3322993086655967E-007) and cuda (2.3326296967941821E-007) differ by less than 4E-4 (0.0001416576883412901) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.435238e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.630624e+04 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.502637e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.646596e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.290043e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.329013e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.455740e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.359221e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.270194e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.329144e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.000099e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.339287e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.276751e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.285838e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.072297e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.423096e+03 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 8b9cb02837..9cddd5fe7c 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -make USEBUILDDIR=1 BACKEND=hip + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2024-05-16_22:21:51 +DATE: 2024-05-16_04:26:13 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403628942014972E-006] fbridge_mode=0 + [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 54.5583s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3675s - [COUNTERS] Fortran MEs ( 1 ) : 54.1908s for 8192 events => throughput is 1.51E+02 events/s + [COUNTERS] PROGRAM TOTAL : 101.9892s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5028s + [COUNTERS] Fortran MEs ( 1 ) : 101.4864s for 8192 events => throughput is 8.07E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403628942014972E-006] fbridge_mode=0 + [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 54.5792s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4048s - [COUNTERS] Fortran MEs ( 1 ) : 54.1744s for 8192 events => throughput is 1.51E+02 events/s + [COUNTERS] PROGRAM TOTAL : 101.7400s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5055s + [COUNTERS] Fortran MEs ( 1 ) : 101.2345s for 8192 events => throughput is 8.09E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x10_fortran > /tmp/valassia/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322783648085419E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 597.8493s - [COUNTERS] Fortran Overhead ( 0 ) : 3.0318s - [COUNTERS] Fortran MEs ( 1 ) : 594.8175s for 90112 events => throughput is 1.51E+02 events/s + [COUNTERS] PROGRAM TOTAL : 1119.6356s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4254s + [COUNTERS] Fortran MEs ( 1 ) : 1115.2102s for 90112 events => throughput is 8.08E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403629013416990E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.24e-06 [1.2403985299359844E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 175.7021s - [COUNTERS] Fortran Overhead ( 0 ) : 80.5963s - [COUNTERS] CudaCpp MEs ( 2 ) : 95.1058s for 8192 events => throughput is 8.61E+01 events/s + [COUNTERS] PROGRAM TOTAL : 229.4020s + [COUNTERS] Fortran Overhead ( 0 ) : 103.2152s + [COUNTERS] CudaCpp MEs ( 2 ) : 126.1868s for 8192 events => throughput is 6.49E+01 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403628942014972E-006) and cpp (1.2403629013416990E-006) differ by less than 2E-4 (5.7565425759520394e-09) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985299359844E-006) differ by less than 2E-4 (5.7578810608305275e-09) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322783773791503E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.332e-07 [2.3322993212353001E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1136.0885s - [COUNTERS] Fortran Overhead ( 0 ) : 83.3490s - [COUNTERS] CudaCpp MEs ( 2 ) : 1052.7395s for 90112 events => throughput is 8.56E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1528.1049s + [COUNTERS] Fortran Overhead ( 0 ) : 113.9982s + [COUNTERS] CudaCpp MEs ( 2 ) : 1414.1067s for 90112 events => throughput is 6.37E+01 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322783648085419E-007) and cpp (2.3322783773791503E-007) differ by less than 2E-4 (5.389840573855054e-09) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993212353001E-007) differ by less than 2E-4 (5.389404034161771e-09) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.026349e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.425842e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.024407e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.948869e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403629009850969E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.24e-06 [1.2403985295828471E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 79.2661s - [COUNTERS] Fortran Overhead ( 0 ) : 35.2128s - [COUNTERS] CudaCpp MEs ( 2 ) : 44.0533s for 8192 events => throughput is 1.86E+02 events/s + [COUNTERS] PROGRAM TOTAL : 117.4242s + [COUNTERS] Fortran Overhead ( 0 ) : 53.6967s + [COUNTERS] CudaCpp MEs ( 2 ) : 63.7275s for 8192 events => throughput is 1.29E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403628942014972E-006) and cpp (1.2403629009850969E-006) differ by less than 2E-4 (5.469044328521022e-09) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985295828471E-006) differ by less than 2E-4 (5.473184350179849e-09) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322783784120318E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.332e-07 [2.3322993222645653E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 520.4130s - [COUNTERS] Fortran Overhead ( 0 ) : 37.9407s - [COUNTERS] CudaCpp MEs ( 2 ) : 482.4723s for 90112 events => throughput is 1.87E+02 events/s + [COUNTERS] PROGRAM TOTAL : 756.7451s + [COUNTERS] Fortran Overhead ( 0 ) : 57.7650s + [COUNTERS] CudaCpp MEs ( 2 ) : 698.9802s for 90112 events => throughput is 1.29E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322783648085419E-007) and cpp (2.3322783784120318E-007) differ by less than 2E-4 (5.832704319530535e-09) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993222645653E-007) differ by less than 2E-4 (5.830713245558172e-09) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.333934e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.540886e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.350214e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.526888e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403629007633195E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 34.4462s - [COUNTERS] Fortran Overhead ( 0 ) : 15.3748s - [COUNTERS] CudaCpp MEs ( 2 ) : 19.0714s for 8192 events => throughput is 4.30E+02 events/s + [COUNTERS] PROGRAM TOTAL : 51.3666s + [COUNTERS] Fortran Overhead ( 0 ) : 23.6472s + [COUNTERS] CudaCpp MEs ( 2 ) : 27.7194s for 8192 events => throughput is 2.96E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403628942014972E-006) and cpp (1.2403629007633195E-006) differ by less than 2E-4 (5.290244020628165e-09) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985293629285E-006) differ by less than 2E-4 (5.29588750630694e-09) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,190 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322783783946155E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 227.1590s - [COUNTERS] Fortran Overhead ( 0 ) : 17.9408s - [COUNTERS] CudaCpp MEs ( 2 ) : 209.2182s for 90112 events => throughput is 4.31E+02 events/s + [COUNTERS] PROGRAM TOTAL : 334.4015s + [COUNTERS] Fortran Overhead ( 0 ) : 27.7321s + [COUNTERS] CudaCpp MEs ( 2 ) : 306.6693s for 90112 events => throughput is 2.94E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322783648085419E-007) and cpp (2.3322783783946155E-007) differ by less than 2E-4 (5.825236737422301e-09) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993222447204E-007) differ by less than 2E-4 (5.822204496297445e-09) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.444423e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.517938e+02 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.525606e+02 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 70 events (found 407 events) + [COUNTERS] PROGRAM TOTAL : 44.9641s + [COUNTERS] Fortran Overhead ( 0 ) : 20.5328s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.4313s for 8192 events => throughput is 3.35E+02 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985293629285E-006) differ by less than 2E-4 (5.29588750630694e-09) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 303 events (found 1531 events) + [COUNTERS] PROGRAM TOTAL : 294.2131s + [COUNTERS] Fortran Overhead ( 0 ) : 24.5260s + [COUNTERS] CudaCpp MEs ( 2 ) : 269.6871s for 90112 events => throughput is 3.34E+02 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993222447204E-007) differ by less than 2E-4 (5.822204496297445e-09) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.118767e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.457069e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.099496e+02 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 70 events (found 407 events) + [COUNTERS] PROGRAM TOTAL : 49.6272s + [COUNTERS] Fortran Overhead ( 0 ) : 23.9735s + [COUNTERS] CudaCpp MEs ( 2 ) : 25.6537s for 8192 events => throughput is 3.19E+02 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985293629285E-006) differ by less than 2E-4 (5.29588750630694e-09) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 303 events (found 1531 events) + [COUNTERS] PROGRAM TOTAL : 308.1385s + [COUNTERS] Fortran Overhead ( 0 ) : 28.1125s + [COUNTERS] CudaCpp MEs ( 2 ) : 280.0260s for 90112 events => throughput is 3.22E+02 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993222447204E-007) differ by less than 2E-4 (5.822204496297445e-09) -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.494217e+02 ) sec^-1 -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.498492e+02 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,28 +504,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403628931370709E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.24e-06 [1.2403985217419736E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 12.3847s - [COUNTERS] Fortran Overhead ( 0 ) : 8.1395s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.2452s for 8192 events => throughput is 1.93E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.6127s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7479s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8648s for 8192 events => throughput is 9.47E+03 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403628942014972E-006) and hip (1.2403628931370709E-006) differ by less than 2E-4 (8.581571009358413e-10) +OK! xsec from fortran (1.2403985227939174E-006) and cuda (1.2403985217419736E-006) differ by less than 2E-4 (8.480691704448873e-10) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -391,65 +537,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322783640044522E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.332e-07 [2.3322993078576733E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 57.3104s - [COUNTERS] Fortran Overhead ( 0 ) : 10.5772s - [COUNTERS] CudaCpp MEs ( 2 ) : 46.7332s for 90112 events => throughput is 1.93E+03 events/s + [COUNTERS] PROGRAM TOTAL : 16.2177s + [COUNTERS] Fortran Overhead ( 0 ) : 6.7356s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.4821s for 90112 events => throughput is 9.50E+03 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322783648085419E-007) and hip (2.3322783640044522E-007) differ by less than 2E-4 (3.447657714872321e-10) +OK! xsec from fortran (2.3322993086655967E-007) and cuda (2.3322993078576733E-007) differ by less than 2E-4 (3.464063480507207e-10) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.938523e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.422089e+03 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.964520e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.074505e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.238719e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.108350e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.382190e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.160591e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.292322e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.110190e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.191145e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.113742e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.289765e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.112799e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.081215e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.647292e+03 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 345e75f604..c909267a2d 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2024-05-16_20:56:31 +DATE: 2024-05-16_01:42:38 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110226551166922] fbridge_mode=0 + [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1817 events) - [COUNTERS] PROGRAM TOTAL : 0.4335s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3854s - [COUNTERS] Fortran MEs ( 1 ) : 0.0481s for 8192 events => throughput is 1.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4817s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4067s + [COUNTERS] Fortran MEs ( 1 ) : 0.0751s for 8192 events => throughput is 1.09E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110226551166922] fbridge_mode=0 + [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3071s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2590s - [COUNTERS] Fortran MEs ( 1 ) : 0.0481s for 8192 events => throughput is 1.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4101s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3363s + [COUNTERS] Fortran MEs ( 1 ) : 0.0738s for 8192 events => throughput is 1.11E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x10_fortran > /tmp/valassia/output_gqttq_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/avalassi/output_gqttq_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510679754343823] fbridge_mode=0 + [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.7403s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2151s - [COUNTERS] Fortran MEs ( 1 ) : 0.5252s for 90112 events => throughput is 1.72E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3997s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5919s + [COUNTERS] Fortran MEs ( 1 ) : 0.8079s for 90112 events => throughput is 1.12E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110226551166922] fbridge_mode=1 + [XSECTION] Cross section = 0.2711 [0.27110539351263335] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4230s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3528s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0702s for 8192 events => throughput is 1.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4922s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4133s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0789s for 8192 events => throughput is 1.04E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110226551166922) and cpp (0.27110226551166922) differ by less than 3E-14 (0.0) +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539351263335) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510679754343820] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510686556561293] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.0612s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2886s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7726s for 90112 events => throughput is 1.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.5284s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6490s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8794s for 90112 events => throughput is 1.02E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510679754343823) and cpp (0.21510679754343820) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561293) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.206967e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.038604e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.209538e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.042402e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110226551166122] fbridge_mode=1 + [XSECTION] Cross section = 0.2711 [0.27110539351262530] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3320s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2981s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0339s for 8192 events => throughput is 2.41E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4203s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3775s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0428s for 8192 events => throughput is 1.91E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110226551166922) and cpp (0.27110226551166122) differ by less than 3E-14 (2.9531932455029164e-14) +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539351262530) differ by less than 3E-14 (2.9531932455029164e-14) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510679754343823] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510686556561281] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.6268s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2529s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3739s for 90112 events => throughput is 2.41E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0947s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6149s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4799s for 90112 events => throughput is 1.88E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510679754343823) and cpp (0.21510679754343823) differ by less than 3E-14 (0.0) +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561281) differ by less than 3E-14 (6.661338147750939e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.436246e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.949373e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.444759e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.919896e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110226551166922] fbridge_mode=1 + [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.2992s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2817s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0175s for 8192 events => throughput is 4.69E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3846s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3601s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0244s for 8192 events => throughput is 3.35E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110226551166922) and cpp (0.27110226551166922) differ by less than 3E-14 (0.0) +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539351263341) differ by less than 3E-14 (4.440892098500626e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,114 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510679754343823] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.4300s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2395s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1905s for 90112 events => throughput is 4.73E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8777s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6020s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2756s for 90112 events => throughput is 3.27E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510679754343823) and cpp (0.21510679754343823) differ by less than 3E-14 (0.0) +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561295) differ by less than 3E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.830686e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.197689e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.794875e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.341628e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1 + [UNWEIGHT] Wrote 404 events (found 1228 events) + [COUNTERS] PROGRAM TOTAL : 0.3821s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3591s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0230s for 8192 events => throughput is 3.56E+05 events/s -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539351263341) differ by less than 3E-14 (4.440892098500626e-16) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 + [UNWEIGHT] Wrote 1939 events (found 1944 events) + [COUNTERS] PROGRAM TOTAL : 1.8414s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5939s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2475s for 90112 events => throughput is 3.64E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561295) differ by less than 3E-14 (0.0) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.543777e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.667139e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,30 +428,176 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' -Memory access fault by GPU node-4 (Agent handle: 0x63ef410) on address 0x1476e6b2a000. Reason: Unknown. - -Program received signal SIGABRT: Process abort signal. - -Backtrace for this error: -#0 0x147a7d01b90f in ??? -#1 0x147a7ce58d2b in ??? -#2 0x147a7ce5a3e4 in ??? -#3 0x147a74421b64 in ??? -#4 0x147a7441eb38 in ??? -#5 0x147a743dc496 in ??? -#6 0x147a7d00f6e9 in ??? -#7 0x147a7cf2649e in ??? -#8 0xffffffffffffffff in ??? -./madX.sh: line 379: 45053 Aborted $timecmd $cmd < ${tmpin} > ${tmp} -ERROR! ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' failed - PDF set = nn23lo1 - alpha_s(Mz)= 0.1300 running at 2 loops. - alpha_s(Mz)= 0.1300 running at 2 loops. - Renormalization scale set on event-by-event basis - Factorization scale set on event-by-event basis - - - getting user params -Enter number of events and max and min iterations: - Number of events and iterations 8192 1 1 +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1 + [UNWEIGHT] Wrote 404 events (found 1228 events) + [COUNTERS] PROGRAM TOTAL : 0.4059s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3719s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0340s for 8192 events => throughput is 2.41E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539351263341) differ by less than 3E-14 (4.440892098500626e-16) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 + [UNWEIGHT] Wrote 1939 events (found 1944 events) + [COUNTERS] PROGRAM TOTAL : 2.0051s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6219s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3832s for 90112 events => throughput is 2.35E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561295) differ by less than 3E-14 (0.0) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.349902e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.363260e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2711 [0.27110539351263363] fbridge_mode=1 + [UNWEIGHT] Wrote 404 events (found 1228 events) + [COUNTERS] PROGRAM TOTAL : 0.7762s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7756s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.23E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.27110539351263330) and cuda (0.27110539351263363) differ by less than 3E-14 (1.3322676295501878e-15) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2151 [0.21510686556561304] fbridge_mode=1 + [UNWEIGHT] Wrote 1939 events (found 1944 events) + [COUNTERS] PROGRAM TOTAL : 2.0116s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0036s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0080s for 90112 events => throughput is 1.13E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21510686556561295) and cuda (0.21510686556561304) differ by less than 3E-14 (4.440892098500626e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.582112e+07 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.121699e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.529721e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.531629e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.538677e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.807286e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.530045e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.783374e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** + +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 1ca71b10fc..4ac5ec3dc1 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu -make USEBUILDDIR=1 BACKEND=hip + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2024-05-16_20:56:44 +DATE: 2024-05-16_13:45:22 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110226551166922] fbridge_mode=0 + [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1817 events) - [COUNTERS] PROGRAM TOTAL : 0.3556s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3075s - [COUNTERS] Fortran MEs ( 1 ) : 0.0481s for 8192 events => throughput is 1.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4857s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4107s + [COUNTERS] Fortran MEs ( 1 ) : 0.0749s for 8192 events => throughput is 1.09E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110226551166922] fbridge_mode=0 + [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3109s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2628s - [COUNTERS] Fortran MEs ( 1 ) : 0.0481s for 8192 events => throughput is 1.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4161s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3412s + [COUNTERS] Fortran MEs ( 1 ) : 0.0749s for 8192 events => throughput is 1.09E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x10_fortran > /tmp/valassia/output_gqttq_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/avalassi/output_gqttq_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510679754343823] fbridge_mode=0 + [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.7401s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2147s - [COUNTERS] Fortran MEs ( 1 ) : 0.5255s for 90112 events => throughput is 1.71E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.4361s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6155s + [COUNTERS] Fortran MEs ( 1 ) : 0.8206s for 90112 events => throughput is 1.10E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110149549279866] fbridge_mode=1 + [XSECTION] Cross section = 0.2711 [0.27110463093540638] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3780s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3208s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0572s for 8192 events => throughput is 1.43E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4907s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4145s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0762s for 8192 events => throughput is 1.07E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110226551166922) and cpp (0.27110149549279866) differ by less than 4E-4 (2.840326210895583e-06) +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110463093540638) differ by less than 4E-4 (2.812844174915341e-06) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510678843355344] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510686273216112] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.8985s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2738s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6247s for 90112 events => throughput is 1.44E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.5059s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6664s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8394s for 90112 events => throughput is 1.07E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510679754343823) and cpp (0.21510678843355344) differ by less than 4E-4 (4.2350520312872675e-08) +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686273216112) differ by less than 4E-4 (1.3172298474195543e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.471741e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.092440e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.471384e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.094603e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110146988852984] fbridge_mode=1 + [XSECTION] Cross section = 0.2711 [0.27110459152958460] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3043s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2842s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0201s for 8192 events => throughput is 4.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3934s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3663s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0270s for 8192 events => throughput is 3.03E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110226551166922) and cpp (0.27110146988852984) differ by less than 4E-4 (2.934771267448788e-06) +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110459152958460) differ by less than 4E-4 (2.9581965829139634e-06) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510676993136629] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510683016166510] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.4572s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2368s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2204s for 90112 events => throughput is 4.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9232s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6228s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3004s for 90112 events => throughput is 3.00E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510679754343823) and cpp (0.21510676993136629) differ by less than 4E-4 (1.2836447871311663e-07) +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510683016166510) differ by less than 4E-4 (1.6458771667782202e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.255154e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.039638e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.282301e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.034015e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110148793566186] fbridge_mode=1 + [XSECTION] Cross section = 0.2711 [0.27110460595003461] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.2823s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2727s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0097s for 8192 events => throughput is 8.48E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3713s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3577s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0136s for 8192 events => throughput is 6.04E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110226551166922) and cpp (0.27110148793566186) differ by less than 4E-4 (2.8682018052839098e-06) +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110460595003461) differ by less than 4E-4 (2.9050052766654844e-06) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,114 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510676419088856] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510682502089912] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.3362s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2299s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1064s for 90112 events => throughput is 8.47E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7502s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6011s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1490s for 90112 events => throughput is 6.05E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510679754343823) and cpp (0.21510676419088856) differ by less than 4E-4 (1.5505111905511626e-07) +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510682502089912) differ by less than 4E-4 (1.8848637739488083e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.680750e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.923571e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.758171e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.853443e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2711 [0.27110460595003461] fbridge_mode=1 + [UNWEIGHT] Wrote 404 events (found 1228 events) + [COUNTERS] PROGRAM TOTAL : 0.3638s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3514s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0124s for 8192 events => throughput is 6.61E+05 events/s -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110460595003461) differ by less than 4E-4 (2.9050052766654844e-06) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2151 [0.21510682502089912] fbridge_mode=1 + [UNWEIGHT] Wrote 1939 events (found 1944 events) + [COUNTERS] PROGRAM TOTAL : 1.7444s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6060s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1385s for 90112 events => throughput is 6.51E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510682502089912) differ by less than 4E-4 (1.8848637739488083e-07) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.334346e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.476144e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,30 +428,178 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' -Memory access fault by GPU node-4 (Agent handle: 0x63ef430) on address 0x145d8d504000. Reason: Unknown. - -Program received signal SIGABRT: Process abort signal. - -Backtrace for this error: -#0 0x14612399790f in ??? -#1 0x1461237d4d2b in ??? -#2 0x1461237d63e4 in ??? -#3 0x14611ad9db64 in ??? -#4 0x14611ad9ab38 in ??? -#5 0x14611ad58496 in ??? -#6 0x14612398b6e9 in ??? -#7 0x1461238a249e in ??? -#8 0xffffffffffffffff in ??? -./madX.sh: line 379: 46236 Aborted $timecmd $cmd < ${tmpin} > ${tmp} -ERROR! ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' failed - PDF set = nn23lo1 - alpha_s(Mz)= 0.1300 running at 2 loops. - alpha_s(Mz)= 0.1300 running at 2 loops. - Renormalization scale set on event-by-event basis - Factorization scale set on event-by-event basis - - - getting user params -Enter number of events and max and min iterations: - Number of events and iterations 8192 1 1 +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2711 [0.27110464176080312] fbridge_mode=1 + [UNWEIGHT] Wrote 404 events (found 1228 events) + [COUNTERS] PROGRAM TOTAL : 0.3762s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3583s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0178s for 8192 events => throughput is 4.59E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110464176080312) differ by less than 4E-4 (2.772913590631809e-06) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2151 [0.21510685411522326] fbridge_mode=1 + [UNWEIGHT] Wrote 1939 events (found 1944 events) + [COUNTERS] PROGRAM TOTAL : 1.8096s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6129s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1967s for 90112 events => throughput is 4.58E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510685411522326) differ by less than 4E-4 (5.3231167917999755e-08) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.750606e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.766894e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2711 [0.27110478167944563] fbridge_mode=1 + [UNWEIGHT] Wrote 404 events (found 1228 events) + [COUNTERS] PROGRAM TOTAL : 0.7793s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7788s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.48E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.27110539351263330) and cuda (0.27110478167944563) differ by less than 4E-4 (2.2568093527297606e-06) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2151 [0.21510689885789414] fbridge_mode=1 + [UNWEIGHT] Wrote 1939 events (found 1944 events) + [COUNTERS] PROGRAM TOTAL : 2.0413s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0348s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0065s for 90112 events => throughput is 1.38E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21510686556561295) and cuda (0.21510689885789414) differ by less than 4E-4 (1.547708907700951e-07) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.566939e+07 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.326602e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.593547e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.720103e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.619232e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.806222e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.144615e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.016256e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** + +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index 0b8f6a32df..23f8d1233a 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu + +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' - make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2024-05-16_20:56:57 +DATE: 2024-05-16_01:43:25 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110226551166922] fbridge_mode=0 + [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1817 events) - [COUNTERS] PROGRAM TOTAL : 0.3529s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3048s - [COUNTERS] Fortran MEs ( 1 ) : 0.0481s for 8192 events => throughput is 1.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4870s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4120s + [COUNTERS] Fortran MEs ( 1 ) : 0.0750s for 8192 events => throughput is 1.09E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110226551166922] fbridge_mode=0 + [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3100s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2620s - [COUNTERS] Fortran MEs ( 1 ) : 0.0481s for 8192 events => throughput is 1.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4171s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3424s + [COUNTERS] Fortran MEs ( 1 ) : 0.0747s for 8192 events => throughput is 1.10E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x10_fortran > /tmp/valassia/output_gqttq_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/avalassi/output_gqttq_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510679754343823] fbridge_mode=0 + [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.7393s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2140s - [COUNTERS] Fortran MEs ( 1 ) : 0.5252s for 90112 events => throughput is 1.72E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.4382s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6232s + [COUNTERS] Fortran MEs ( 1 ) : 0.8150s for 90112 events => throughput is 1.11E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110226549005623] fbridge_mode=1 + [XSECTION] Cross section = 0.2711 [0.27110539348916002] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4032s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3341s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0692s for 8192 events => throughput is 1.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5038s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4232s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0806s for 8192 events => throughput is 1.02E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110226551166922) and cpp (0.27110226549005623) differ by less than 2E-4 (7.972267290767832e-11) +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539348916002) differ by less than 2E-4 (8.658362915525686e-11) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510679758658835] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510686560794337] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.0516s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2896s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7620s for 90112 events => throughput is 1.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.5722s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6823s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8899s for 90112 events => throughput is 1.01E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510679754343823) and cpp (0.21510679758658835) differ by less than 2E-4 (2.0059864880295208e-10) +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686560794337) differ by less than 2E-4 (1.967879192932287e-10) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.200794e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.030982e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.203149e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.031715e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110226549005628] fbridge_mode=1 + [XSECTION] Cross section = 0.2711 [0.27110539348916002] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3316s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2985s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0331s for 8192 events => throughput is 2.47E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4234s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3811s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0422s for 8192 events => throughput is 1.94E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110226551166922) and cpp (0.27110226549005628) differ by less than 2E-4 (7.972245086307339e-11) +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539348916002) differ by less than 2E-4 (8.658362915525686e-11) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510679758658832] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510686560794334] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.6178s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2527s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3650s for 90112 events => throughput is 2.47E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.1135s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6453s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4682s for 90112 events => throughput is 1.92E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510679754343823) and cpp (0.21510679758658832) differ by less than 2E-4 (2.0059842675834716e-10) +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686560794334) differ by less than 2E-4 (1.9678769724862377e-10) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.461318e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.925046e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.467281e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.926756e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110226530029391] fbridge_mode=1 + [XSECTION] Cross section = 0.2711 [0.27110539330272815] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3025s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2850s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0174s for 8192 events => throughput is 4.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3913s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3660s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0253s for 8192 events => throughput is 3.24E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110226551166922) and cpp (0.27110226530029391) differ by less than 2E-4 (7.796884249344771e-10) +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539330272815) differ by less than 2E-4 (7.742566587864985e-10) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,114 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510679756340242] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510686558551748] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.4301s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2388s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1913s for 90112 events => throughput is 4.71E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8962s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6193s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2769s for 90112 events => throughput is 3.25E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510679754343823) and cpp (0.21510679756340242) differ by less than 2E-4 (9.281064805577444e-11) +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686558551748) differ by less than 2E-4 (9.253309229961815e-11) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.695225e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.266154e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.795904e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.269502e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2711 [0.27110539330272815] fbridge_mode=1 + [UNWEIGHT] Wrote 404 events (found 1228 events) + [COUNTERS] PROGRAM TOTAL : 0.3831s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3610s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0221s for 8192 events => throughput is 3.70E+05 events/s -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539330272815) differ by less than 2E-4 (7.742566587864985e-10) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2151 [0.21510686558551748] fbridge_mode=1 + [UNWEIGHT] Wrote 1939 events (found 1944 events) + [COUNTERS] PROGRAM TOTAL : 1.8418s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5992s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2426s for 90112 events => throughput is 3.71E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686558551748) differ by less than 2E-4 (9.253309229961815e-11) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.789065e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.854777e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,30 +428,177 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' -Memory access fault by GPU node-4 (Agent handle: 0x63ef410) on address 0x149212efa000. Reason: Page not present or supervisor privilege. - -Program received signal SIGABRT: Process abort signal. - -Backtrace for this error: -#0 0x1495a93d390f in ??? -#1 0x1495a9210d2b in ??? -#2 0x1495a92123e4 in ??? -#3 0x1495a07d9b64 in ??? -#4 0x1495a07d6b38 in ??? -#5 0x1495a0794496 in ??? -#6 0x1495a93c76e9 in ??? -#7 0x1495a92de49e in ??? -#8 0xffffffffffffffff in ??? -./madX.sh: line 379: 47427 Aborted $timecmd $cmd < ${tmpin} > ${tmp} -ERROR! ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' failed - PDF set = nn23lo1 - alpha_s(Mz)= 0.1300 running at 2 loops. - alpha_s(Mz)= 0.1300 running at 2 loops. - Renormalization scale set on event-by-event basis - Factorization scale set on event-by-event basis - - - getting user params -Enter number of events and max and min iterations: - Number of events and iterations 8192 1 1 +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2711 [0.27110539330272815] fbridge_mode=1 + [UNWEIGHT] Wrote 404 events (found 1228 events) + [COUNTERS] PROGRAM TOTAL : 0.4096s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3737s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0358s for 8192 events => throughput is 2.29E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539330272815) differ by less than 2E-4 (7.742566587864985e-10) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2151 [0.21510686558551748] fbridge_mode=1 + [UNWEIGHT] Wrote 1939 events (found 1944 events) + [COUNTERS] PROGRAM TOTAL : 2.0275s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6170s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4105s for 90112 events => throughput is 2.20E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686558551748) differ by less than 2E-4 (9.253309229961815e-11) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.335204e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.335548e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2711 [0.27110539343558537] fbridge_mode=1 + [UNWEIGHT] Wrote 404 events (found 1228 events) + [COUNTERS] PROGRAM TOTAL : 0.7731s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7724s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.22E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.27110539351263330) and cuda (0.27110539343558537) differ by less than 2E-4 (2.8419910869104115e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2151 [0.21510686553631395] fbridge_mode=1 + [UNWEIGHT] Wrote 1939 events (found 1944 events) + [COUNTERS] PROGRAM TOTAL : 2.0091s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0010s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0081s for 90112 events => throughput is 1.12E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21510686556561295) and cuda (0.21510686553631395) differ by less than 2E-4 (1.3620671257541517e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.632055e+07 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.046951e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.534300e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.533151e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.529948e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.832656e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.532773e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.792603e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** + +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt index b976ca012f..7b51bb9221 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' - make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' OMP_NUM_THREADS= -DATE: 2024-05-16_21:01:14 +DATE: 2024-05-16_05:58:47 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256290] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 3321 events (found 6423 events) - [COUNTERS] PROGRAM TOTAL : 1.0496s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0170s - [COUNTERS] Fortran MEs ( 1 ) : 0.0325s for 8192 events => throughput is 2.52E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9450s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8961s + [COUNTERS] Fortran MEs ( 1 ) : 0.0489s for 8192 events => throughput is 1.68E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256290] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.6276s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5952s - [COUNTERS] Fortran MEs ( 1 ) : 0.0324s for 8192 events => throughput is 2.53E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4283s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3794s + [COUNTERS] Fortran MEs ( 1 ) : 0.0489s for 8192 events => throughput is 1.67E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x10_fortran > /tmp/valassia/output_heftggbb_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp/avalassi/output_heftggbb_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895240377502] fbridge_mode=0 + [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=0 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 4.7668s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4120s - [COUNTERS] Fortran MEs ( 1 ) : 0.3548s for 90112 events => throughput is 2.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8593s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3225s + [COUNTERS] Fortran MEs ( 1 ) : 0.5368s for 90112 events => throughput is 1.68E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256197] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0162955499256148] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.6698s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6345s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0353s for 8192 events => throughput is 2.32E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4564s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4132s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0431s for 8192 events => throughput is 1.90E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256290) and cpp (2.0162955499256197) differ by less than 3E-14 (4.6629367034256575e-15) +OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955499256148) differ by less than 3E-14 (6.661338147750939e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=1 + [XSECTION] Cross section = 2.043 [2.0434895240377564] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 4.8230s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4359s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3870s for 90112 events => throughput is 2.33E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8316s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3535s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4781s for 90112 events => throughput is 1.88E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0434895240377502) and cpp (2.0434895240377569) differ by less than 3E-14 (3.3306690738754696e-15) +OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895240377564) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.414981e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.935091e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.407163e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.955688e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256205] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.6333s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6138s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0194s for 8192 events => throughput is 4.21E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4208s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3966s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0242s for 8192 events => throughput is 3.38E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256290) and cpp (2.0162955499256205) differ by less than 3E-14 (4.218847493575595e-15) +OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955499256161) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=1 + [XSECTION] Cross section = 2.043 [2.0434895240377564] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 4.6418s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4278s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2140s for 90112 events => throughput is 4.21E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6048s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3350s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2698s for 90112 events => throughput is 3.34E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0434895240377502) and cpp (2.0434895240377569) differ by less than 3E-14 (3.3306690738754696e-15) +OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895240377564) differ by less than 3E-14 (2.220446049250313e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.251157e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.396840e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.285398e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.435183e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256205] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0162955499256152] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.6167s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6059s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0108s for 8192 events => throughput is 7.59E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4008s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3863s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0145s for 8192 events => throughput is 5.65E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256290) and cpp (2.0162955499256205) differ by less than 3E-14 (4.218847493575595e-15) +OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955499256152) differ by less than 3E-14 (4.440892098500626e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,114 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895240377564] fbridge_mode=1 + [XSECTION] Cross section = 2.043 [2.0434895240377560] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 4.5279s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4098s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1181s for 90112 events => throughput is 7.63E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5134s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3430s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1705s for 90112 events => throughput is 5.29E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0434895240377502) and cpp (2.0434895240377564) differ by less than 3E-14 (3.1086244689504383e-15) +OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895240377560) differ by less than 3E-14 (4.440892098500626e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.922831e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.293625e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.953876e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.214602e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.016 [2.0162955499256152] fbridge_mode=1 + [UNWEIGHT] Wrote 1617 events (found 1622 events) + [COUNTERS] PROGRAM TOTAL : 0.3999s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3865s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0134s for 8192 events => throughput is 6.10E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955499256152) differ by less than 3E-14 (4.440892098500626e-16) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.043 [2.0434895240377560] fbridge_mode=1 + [UNWEIGHT] Wrote 1818 events (found 1823 events) + [COUNTERS] PROGRAM TOTAL : 1.4652s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3183s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1469s for 90112 events => throughput is 6.13E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895240377560) differ by less than 3E-14 (4.440892098500626e-16) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.964595e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.873725e+05 ) sec^-1 -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,28 +428,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256223] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0162955499256152] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.8891s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8881s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0010s for 8192 events => throughput is 7.92E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4159s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3941s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0218s for 8192 events => throughput is 3.76E+05 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256290) and hip (2.0162955499256223) differ by less than 3E-14 (3.3306690738754696e-15) +OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955499256152) differ by less than 3E-14 (4.440892098500626e-16) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -391,65 +461,143 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895240377577] fbridge_mode=1 + [XSECTION] Cross section = 2.043 [2.0434895240377560] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 4.6970s - [COUNTERS] Fortran Overhead ( 0 ) : 4.6858s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0111s for 90112 events => throughput is 8.10E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.5767s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3320s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2446s for 90112 events => throughput is 3.68E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895240377560) differ by less than 3E-14 (4.440892098500626e-16) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.692127e+05 ) sec^-1 -OK! xsec from fortran (2.0434895240377502) and hip (2.0434895240377577) differ by less than 3E-14 (3.774758283725532e-15) +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.804832e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.016 [2.0162955499256165] fbridge_mode=1 + [UNWEIGHT] Wrote 1617 events (found 1622 events) + [COUNTERS] PROGRAM TOTAL : 0.8067s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8061s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.36E+07 events/s -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! xsec from fortran (2.0162955499256161) and cuda (2.0162955499256165) differ by less than 3E-14 (2.220446049250313e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.043 [2.0434895240377573] fbridge_mode=1 + [UNWEIGHT] Wrote 1818 events (found 1823 events) + [COUNTERS] PROGRAM TOTAL : 1.7545s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7475s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0070s for 90112 events => throughput is 1.29E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.0434895240377569) and cuda (2.0434895240377573) differ by less than 3E-14 (2.220446049250313e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.522443e+06 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.804334e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.937728e+06 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.230995e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.540081e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.136468e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.483834e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.810257e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.539468e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.115441e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.497797e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.048562e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.500298e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.115501e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.024295e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.748994e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt index f1ee5d8619..d09b81d7d3 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' OMP_NUM_THREADS= -DATE: 2024-05-16_21:01:52 +DATE: 2024-05-16_05:59:14 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256290] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 3321 events (found 6423 events) - [COUNTERS] PROGRAM TOTAL : 0.9753s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9428s - [COUNTERS] Fortran MEs ( 1 ) : 0.0325s for 8192 events => throughput is 2.52E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9433s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8939s + [COUNTERS] Fortran MEs ( 1 ) : 0.0495s for 8192 events => throughput is 1.66E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256290] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.6342s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6017s - [COUNTERS] Fortran MEs ( 1 ) : 0.0325s for 8192 events => throughput is 2.52E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4256s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3772s + [COUNTERS] Fortran MEs ( 1 ) : 0.0484s for 8192 events => throughput is 1.69E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x10_fortran > /tmp/valassia/output_heftggbb_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp/avalassi/output_heftggbb_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895240377502] fbridge_mode=0 + [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=0 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 4.7687s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4136s - [COUNTERS] Fortran MEs ( 1 ) : 0.3551s for 90112 events => throughput is 2.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8568s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3199s + [COUNTERS] Fortran MEs ( 1 ) : 0.5369s for 90112 events => throughput is 1.68E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,43 +124,43 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162897089316618] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0162897371946169] fbridge_mode=1 [UNWEIGHT] Wrote 1620 events (found 1625 events) - [COUNTERS] PROGRAM TOTAL : 0.6580s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6273s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0307s for 8192 events => throughput is 2.67E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4525s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4113s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0413s for 8192 events => throughput is 1.99E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256290) and cpp (2.0162897089316618) differ by less than 4E-4 (2.8968937452189536e-06) +OK! xsec from fortran (2.0162955499256161) and cpp (2.0162897371946169) differ by less than 4E-4 (2.8828764708777044e-06) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** ERROR! events.lhe.cpp.1 and events.lhe.ref.1 differ! -diff /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.cpp.1 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.ref.1 | head -20 +diff /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.cpp.1 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.ref.1 | head -20 6206,6207c6206,6207 -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.59936081269E+01 0.59936081269E+01 0.00000000000E+00 0. -1. -< 5 1 1 2 501 0 0.45273385612E+02 -0.31131305296E+02 0.47763304668E+03 0.48080583909E+03 0.47000000000E+01 0. 1. +< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.59936081260E+01 0.59936081260E+01 0.00000000000E+00 0. -1. +< 5 1 1 2 501 0 0.45273385612E+02 -0.31131305296E+02 0.47763304676E+03 0.48080583916E+03 0.47000000000E+01 0. 1. --- -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.59936081269E+01 0.59936081269E+01 0.00000000000E+00 0. 1. -> 5 1 1 2 501 0 0.45273385612E+02 -0.31131305296E+02 0.47763304668E+03 0.48080583909E+03 0.47000000000E+01 0. -1. +> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.59936081260E+01 0.59936081260E+01 0.00000000000E+00 0. 1. +> 5 1 1 2 501 0 0.45273385612E+02 -0.31131305296E+02 0.47763304676E+03 0.48080583916E+03 0.47000000000E+01 0. -1. 8306,8307c8306,8307 -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.23857997230E+02 0.23857997230E+02 0.00000000000E+00 0. 1. -< 5 1 1 2 501 0 -0.34843521722E+02 0.35239303629E+02 0.13219496701E+02 0.51504607748E+02 0.47000000000E+01 0. -1. +< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.23857997239E+02 0.23857997239E+02 0.00000000000E+00 0. 1. +< 5 1 1 2 501 0 -0.34843521722E+02 0.35239303629E+02 0.13219496682E+02 0.51504607743E+02 0.47000000000E+01 0. -1. --- -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.23857997230E+02 0.23857997230E+02 0.00000000000E+00 0. -1. -> 5 1 1 2 501 0 -0.34843521722E+02 0.35239303629E+02 0.13219496701E+02 0.51504607748E+02 0.47000000000E+01 0. 1. +> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.23857997239E+02 0.23857997239E+02 0.00000000000E+00 0. -1. +> 5 1 1 2 501 0 -0.34843521722E+02 0.35239303629E+02 0.13219496682E+02 0.51504607743E+02 0.47000000000E+01 0. 1. 9606,9619d9605 < 4 1 1E-03 0.1250139E+03 0.7546771E-02 0.1235066E+00 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.94948249861E+03 0.94948249861E+03 0.00000000000E+00 0. 1. -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.41149990064E+01 0.41149990064E+01 0.00000000000E+00 0. -1. -< 5 1 1 2 501 0 -0.96459450317E+01 -0.34409175043E+02 0.83136584828E+02 0.90613560351E+02 0.47000000000E+01 0. -1. -< -5 1 1 2 0 501 0.96459450317E+01 0.34409175043E+02 0.86223091477E+03 0.86298393726E+03 0.47000000000E+01 0. 1. +< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.94948250004E+03 0.94948250004E+03 0.00000000000E+00 0. 1. +< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.41149990002E+01 0.41149990002E+01 0.00000000000E+00 0. -1. +< 5 1 1 2 501 0 -0.96459450317E+01 -0.34409175043E+02 0.83136584965E+02 0.90613560477E+02 0.47000000000E+01 0. -1. +< -5 1 1 2 0 501 0.96459450317E+01 0.34409175043E+02 0.86223091608E+03 0.86298393857E+03 0.47000000000E+01 0. 1. < < 0 0.12501391E+03 diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt index fb923ad0e1..291c38991b 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -make USEBUILDDIR=1 BACKEND=hip + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' OMP_NUM_THREADS= -DATE: 2024-05-16_21:02:01 +DATE: 2024-05-16_05:59:20 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256290] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 3321 events (found 6423 events) - [COUNTERS] PROGRAM TOTAL : 0.9762s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9437s - [COUNTERS] Fortran MEs ( 1 ) : 0.0325s for 8192 events => throughput is 2.52E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9570s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9073s + [COUNTERS] Fortran MEs ( 1 ) : 0.0498s for 8192 events => throughput is 1.65E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256290] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.6290s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5966s - [COUNTERS] Fortran MEs ( 1 ) : 0.0324s for 8192 events => throughput is 2.53E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4201s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3715s + [COUNTERS] Fortran MEs ( 1 ) : 0.0486s for 8192 events => throughput is 1.68E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x10_fortran > /tmp/valassia/output_heftggbb_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp/avalassi/output_heftggbb_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895240377502] fbridge_mode=0 + [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=0 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 4.7727s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4178s - [COUNTERS] Fortran MEs ( 1 ) : 0.3549s for 90112 events => throughput is 2.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8553s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3195s + [COUNTERS] Fortran MEs ( 1 ) : 0.5358s for 90112 events => throughput is 1.68E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,23 +124,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955975931003] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0162955975930954] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.6634s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6288s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0346s for 8192 events => throughput is 2.37E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4604s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4161s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0443s for 8192 events => throughput is 1.85E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256290) and cpp (2.0162955975931003) differ by less than 2E-4 (2.364111328923002e-08) +OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955975930954) differ by less than 2E-4 (2.3641117063988304e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -158,23 +158,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895706383704] fbridge_mode=1 + [XSECTION] Cross section = 2.043 [2.0434895706383660] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 4.8210s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4405s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3805s for 90112 events => throughput is 2.37E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8327s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3534s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4793s for 90112 events => throughput is 1.88E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0434895240377502) and cpp (2.0434895706383704) differ by less than 2E-4 (2.2804433230660948e-08) +OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895706383660) differ by less than 2E-4 (2.2804427679545825e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -182,15 +182,15 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.380153e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.817766e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.394562e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.799752e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,23 +204,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955975931007] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0162955975930958] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.6350s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6162s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0188s for 8192 events => throughput is 4.36E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4227s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3979s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0248s for 8192 events => throughput is 3.30E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256290) and cpp (2.0162955975931007) differ by less than 2E-4 (2.3641113511274625e-08) +OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955975930958) differ by less than 2E-4 (2.364111728603291e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -238,23 +238,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895706383709] fbridge_mode=1 + [XSECTION] Cross section = 2.043 [2.0434895706383669] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 4.6291s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4234s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2057s for 90112 events => throughput is 4.38E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6020s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3321s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2699s for 90112 events => throughput is 3.34E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0434895240377502) and cpp (2.0434895706383709) differ by less than 2E-4 (2.2804433452705553e-08) +OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895706383669) differ by less than 2E-4 (2.2804428123635034e-08) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -262,15 +262,15 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.439993e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.208317e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.453301e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.242147e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,23 +284,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955953691122] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0162955953691082] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.6162s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6054s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0107s for 8192 events => throughput is 7.64E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4018s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3865s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0153s for 8192 events => throughput is 5.35E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256290) and cpp (2.0162955953691122) differ by less than 2E-4 (2.2538106270175717e-08) +OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955953691082) differ by less than 2E-4 (2.253811048902321e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -318,23 +318,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895701243891] fbridge_mode=1 + [XSECTION] Cross section = 2.043 [2.0434895701243878] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 4.5291s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4114s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1177s for 90112 events => throughput is 7.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4998s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3319s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1679s for 90112 events => throughput is 5.37E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0434895240377502) and cpp (2.0434895701243891) differ by less than 2E-4 (2.255291176034291e-08) +OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895701243878) differ by less than 2E-4 (2.255290776354002e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -342,23 +342,177 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.910321e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.916130e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.911254e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.016 [2.0162955953691082] fbridge_mode=1 + [UNWEIGHT] Wrote 1617 events (found 1622 events) + [COUNTERS] PROGRAM TOTAL : 0.4012s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3876s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0136s for 8192 events => throughput is 6.01E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955953691082) differ by less than 2E-4 (2.253811048902321e-08) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.043 [2.0434895701243878] fbridge_mode=1 + [UNWEIGHT] Wrote 1818 events (found 1823 events) + [COUNTERS] PROGRAM TOTAL : 1.4747s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3226s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1520s for 90112 events => throughput is 5.93E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895701243878) differ by less than 2E-4 (2.255290776354002e-08) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.307270e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.972724e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.162265e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.016 [2.0162955953691082] fbridge_mode=1 + [UNWEIGHT] Wrote 1617 events (found 1622 events) + [COUNTERS] PROGRAM TOTAL : 0.4179s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3948s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0231s for 8192 events => throughput is 3.55E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955953691082) differ by less than 2E-4 (2.253811048902321e-08) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.043 [2.0434895701243878] fbridge_mode=1 + [UNWEIGHT] Wrote 1818 events (found 1823 events) + [COUNTERS] PROGRAM TOTAL : 1.5949s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3393s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2556s for 90112 events => throughput is 3.53E+05 events/s -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895701243878) differ by less than 2E-4 (2.255290776354002e-08) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.332654e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.349365e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,28 +524,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955503257880] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0162955503257827] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.9201s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9190s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0010s for 8192 events => throughput is 7.94E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.8124s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8118s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.37E+07 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256290) and hip (2.0162955503257880) differ by less than 2E-4 (1.9846257970357328e-10) +OK! xsec from fortran (2.0162955499256161) and cuda (2.0162955503257827) differ by less than 2E-4 (1.9846613241725208e-10) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -403,65 +557,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895242795763] fbridge_mode=1 + [XSECTION] Cross section = 2.043 [2.0434895242795732] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 4.7432s - [COUNTERS] Fortran Overhead ( 0 ) : 4.7321s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0111s for 90112 events => throughput is 8.11E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.7514s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7444s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0070s for 90112 events => throughput is 1.29E+07 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0434895240377502) and hip (2.0434895242795763) differ by less than 2E-4 (1.1833978241782006e-10) +OK! xsec from fortran (2.0434895240377569) and cuda (2.0434895242795732) differ by less than 2E-4 (1.183348974365117e-10) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.302413e+06 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.815756e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.958478e+06 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.247313e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.540431e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.109600e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.493504e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.657132e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.548564e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.111219e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.486657e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.039865e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.512117e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.111200e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.031534e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.771983e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt index f1b855b6c4..80269e77b1 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx + +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y - +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' OMP_NUM_THREADS= -DATE: 2024-05-16_21:03:04 +DATE: 2024-05-16_06:00:18 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084370E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 1041 events) - [COUNTERS] PROGRAM TOTAL : 1.7341s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3493s - [COUNTERS] Fortran MEs ( 1 ) : 1.3848s for 8192 events => throughput is 5.92E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6807s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3392s + [COUNTERS] Fortran MEs ( 1 ) : 2.3415s for 8192 events => throughput is 3.50E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084370E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.6406s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2557s - [COUNTERS] Fortran MEs ( 1 ) : 1.3848s for 8192 events => throughput is 5.92E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6731s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3387s + [COUNTERS] Fortran MEs ( 1 ) : 2.3344s for 8192 events => throughput is 3.51E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x10_fortran > /tmp/valassia/output_smeftggtttt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /tmp/avalassi/output_smeftggtttt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551438261E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 16.5982s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3360s - [COUNTERS] Fortran MEs ( 1 ) : 15.2622s for 90112 events => throughput is 5.90E+03 events/s + [COUNTERS] PROGRAM TOTAL : 27.7347s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8315s + [COUNTERS] Fortran MEs ( 1 ) : 25.9031s for 90112 events => throughput is 3.48E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084349E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 4.3730s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3359s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.0371s for 8192 events => throughput is 4.02E+03 events/s + [COUNTERS] PROGRAM TOTAL : 5.3198s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7672s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.5526s for 8192 events => throughput is 3.21E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084370E-007) and cpp (7.9896697955084349E-007) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896697955084454E-007) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.667e-07 [7.6668083551438187E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 25.7765s - [COUNTERS] Fortran Overhead ( 0 ) : 3.3566s - [COUNTERS] CudaCpp MEs ( 2 ) : 22.4199s for 90112 events => throughput is 4.02E+03 events/s + [COUNTERS] PROGRAM TOTAL : 31.9550s + [COUNTERS] Fortran Overhead ( 0 ) : 4.2173s + [COUNTERS] CudaCpp MEs ( 2 ) : 27.7377s for 90112 events => throughput is 3.25E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438261E-007) and cpp (7.6668083551438230E-007) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668083551438187E-007) differ by less than 3E-14 (5.551115123125783e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.087419e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.410579e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.086030e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.412797e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084317E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.99e-07 [7.9896697955084412E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.0686s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1665s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9021s for 8192 events => throughput is 9.08E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.9225s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6149s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.3076s for 8192 events => throughput is 6.27E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084370E-007) and cpp (7.9896697955084317E-007) differ by less than 3E-14 (6.661338147750939e-16) +OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896697955084412E-007) differ by less than 3E-14 (4.440892098500626e-16) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551438272E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 12.1684s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2328s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.9355s for 90112 events => throughput is 9.07E+03 events/s + [COUNTERS] PROGRAM TOTAL : 17.4719s + [COUNTERS] Fortran Overhead ( 0 ) : 3.0716s + [COUNTERS] CudaCpp MEs ( 2 ) : 14.4004s for 90112 events => throughput is 6.26E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438261E-007) and cpp (7.6668083551438272E-007) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668083551438230E-007) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.295119e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.486190e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.298168e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.480385e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084359E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.0616s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6636s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3980s for 8192 events => throughput is 2.06E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.4971s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9161s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5810s for 8192 events => throughput is 1.41E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084370E-007) and cpp (7.9896697955084359E-007) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896697955084454E-007) differ by less than 3E-14 (2.220446049250313e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,190 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551438261E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 6.0991s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7348s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.3643s for 90112 events => throughput is 2.06E+04 events/s + [COUNTERS] PROGRAM TOTAL : 8.7948s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3698s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.4250s for 90112 events => throughput is 1.40E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438261E-007) and cpp (7.6668083551438261E-007) differ by less than 3E-14 (0.0) +OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668083551438198E-007) differ by less than 3E-14 (4.440892098500626e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.119923e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.446474e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.453779e+04 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 215 events (found 963 events) + [COUNTERS] PROGRAM TOTAL : 1.3443s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8339s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5104s for 8192 events => throughput is 1.60E+04 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896697955084454E-007) differ by less than 3E-14 (2.220446049250313e-16) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1700 events (found 1705 events) + [COUNTERS] PROGRAM TOTAL : 8.0840s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3189s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.7651s for 90112 events => throughput is 1.56E+04 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668083551438198E-007) differ by less than 3E-14 (4.440892098500626e-16) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.657111e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.109682e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.653778e+04 ) sec^-1 -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 215 events (found 963 events) + [COUNTERS] PROGRAM TOTAL : 1.7114s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0187s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6926s for 8192 events => throughput is 1.18E+04 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896697955084454E-007) differ by less than 3E-14 (2.220446049250313e-16) -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1700 events (found 1705 events) + [COUNTERS] PROGRAM TOTAL : 10.1275s + [COUNTERS] Fortran Overhead ( 0 ) : 2.5284s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.5992s for 90112 events => throughput is 1.19E+04 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668083551438198E-007) differ by less than 3E-14 (4.440892098500626e-16) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.225114e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.220944e+04 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,28 +504,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084359E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.7582s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7082s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0500s for 8192 events => throughput is 1.64E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8326s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8156s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0171s for 8192 events => throughput is 4.80E+05 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084370E-007) and hip (7.9896697955084359E-007) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (7.9896697955084444E-007) and cuda (7.9896697955084454E-007) differ by less than 3E-14 (2.220446049250313e-16) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -391,65 +537,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551438272E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 2.2748s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7270s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5478s for 90112 events => throughput is 1.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.4753s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2868s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1885s for 90112 events => throughput is 4.78E+05 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438261E-007) and hip (7.6668083551438272E-007) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (7.6668083551438230E-007) and cuda (7.6668083551438198E-007) differ by less than 3E-14 (4.440892098500626e-16) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.667995e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.843300e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.660148e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.218089e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.214770e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.155842e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.754199e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.421655e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.210493e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.190444e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.219583e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.415946e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.211111e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.148629e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.134286e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.764175e+05 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt index ab8828dfcd..45b154f6da 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -make USEBUILDDIR=1 BACKEND=hip + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' OMP_NUM_THREADS= -DATE: 2024-05-16_21:04:57 +DATE: 2024-05-16_06:02:57 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084370E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 1041 events) - [COUNTERS] PROGRAM TOTAL : 1.6428s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2578s - [COUNTERS] Fortran MEs ( 1 ) : 1.3850s for 8192 events => throughput is 5.91E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6694s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3400s + [COUNTERS] Fortran MEs ( 1 ) : 2.3294s for 8192 events => throughput is 3.52E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084370E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.6611s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2709s - [COUNTERS] Fortran MEs ( 1 ) : 1.3902s for 8192 events => throughput is 5.89E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6748s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3390s + [COUNTERS] Fortran MEs ( 1 ) : 2.3358s for 8192 events => throughput is 3.51E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x10_fortran > /tmp/valassia/output_smeftggtttt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /tmp/avalassi/output_smeftggtttt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551438261E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 16.5795s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3451s - [COUNTERS] Fortran MEs ( 1 ) : 15.2345s for 90112 events => throughput is 5.92E+03 events/s + [COUNTERS] PROGRAM TOTAL : 27.4771s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8059s + [COUNTERS] Fortran MEs ( 1 ) : 25.6712s for 90112 events => throughput is 3.51E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896781657409323E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.99e-07 [7.9896785213255034E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 3.9914s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1011s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8903s for 8192 events => throughput is 4.33E+03 events/s + [COUNTERS] PROGRAM TOTAL : 5.0871s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6795s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.4076s for 8192 events => throughput is 3.40E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084370E-007) and cpp (7.9896781657409323E-007) differ by less than 4E-4 (1.0476318432761644e-06) +OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896785213255034E-007) differ by less than 4E-4 (1.0921373827521563e-06) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668135917139758E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.667e-07 [7.6668138359550833E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 23.8342s - [COUNTERS] Fortran Overhead ( 0 ) : 3.1725s - [COUNTERS] CudaCpp MEs ( 2 ) : 20.6617s for 90112 events => throughput is 4.36E+03 events/s + [COUNTERS] PROGRAM TOTAL : 30.6384s + [COUNTERS] Fortran Overhead ( 0 ) : 4.1272s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.5112s for 90112 events => throughput is 3.40E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438261E-007) and cpp (7.6668135917139758E-007) differ by less than 4E-4 (6.830182661676787e-07) +OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668138359550833E-007) differ by less than 4E-4 (7.148752136920677e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.506422e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.518646e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.499843e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.519140e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896768403674554E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.99e-07 [7.9896766542858863E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.2200s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7417s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4783s for 8192 events => throughput is 1.71E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.6928s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0131s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6797s for 8192 events => throughput is 1.21E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084370E-007) and cpp (7.9896768403674554E-007) differ by less than 4E-4 (8.817459542509454e-07) +OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896766542858863E-007) differ by less than 4E-4 (8.584556829838164e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668124704275563E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.667e-07 [7.6668121906848987E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 7.0708s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8105s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.2603s for 90112 events => throughput is 1.71E+04 events/s + [COUNTERS] PROGRAM TOTAL : 9.9205s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4645s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.4560s for 90112 events => throughput is 1.21E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438261E-007) and cpp (7.6668124704275563E-007) differ by less than 4E-4 (5.367662185840061e-07) +OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668121906848987E-007) differ by less than 4E-4 (5.002787206720427e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.747376e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.232964e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.759961e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.231409e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,22 +276,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896764767194441E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.99e-07 [7.9896764408326359E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.6792s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4730s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2062s for 8192 events => throughput is 3.97E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.9294s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6306s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2988s for 8192 events => throughput is 2.74E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084370E-007) and cpp (7.9896764767194441E-007) differ by less than 4E-4 (8.362311807452727e-07) +OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896764408326359E-007) differ by less than 4E-4 (8.31739528805997e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,44 +309,190 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668127564727394E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.667e-07 [7.6668124799901306E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 3.8141s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5466s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.2675s for 90112 events => throughput is 3.97E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.3691s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0900s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.2791s for 90112 events => throughput is 2.75E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438261E-007) and cpp (7.6668127564727394E-007) differ by less than 4E-4 (5.740757704764121e-07) +OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668124799901306E-007) differ by less than 4E-4 (5.380134884269694e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.133308e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.797581e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.799365e+04 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.99e-07 [7.9896764408326359E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 215 events (found 963 events) + [COUNTERS] PROGRAM TOTAL : 0.8582s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5958s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2624s for 8192 events => throughput is 3.12E+04 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896764408326359E-007) differ by less than 4E-4 (8.31739528805997e-07) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.667e-07 [7.6668124799901306E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1700 events (found 1705 events) + [COUNTERS] PROGRAM TOTAL : 4.9443s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0456s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.8987s for 90112 events => throughput is 3.11E+04 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668124799901306E-007) differ by less than 4E-4 (5.380134884269694e-07) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.232867e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.111803e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.231460e+04 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.99e-07 [7.9896778056937195E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 215 events (found 963 events) + [COUNTERS] PROGRAM TOTAL : 1.0285s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6828s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3457s for 8192 events => throughput is 2.37E+04 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896778056937195E-007) differ by less than 4E-4 (1.0025677505964836e-06) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.667e-07 [7.6668139178203571E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1700 events (found 1705 events) + [COUNTERS] PROGRAM TOTAL : 5.9990s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1683s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.8307s for 90112 events => throughput is 2.35E+04 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668139178203571E-007) differ by less than 4E-4 (7.255530953820255e-07) -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.399591e+04 ) sec^-1 -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.392653e+04 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,28 +504,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896777191982386E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.99e-07 [7.9896805369365078E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.6713s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6454s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0259s for 8192 events => throughput is 3.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8276s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8136s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0140s for 8192 events => throughput is 5.86E+05 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084370E-007) and hip (7.9896777191982386E-007) differ by less than 4E-4 (9.917418370974929e-07) +OK! xsec from fortran (7.9896697955084444E-007) and cuda (7.9896805369365078E-007) differ by less than 4E-4 (1.3444145174901223e-06) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -391,65 +537,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668141007936531E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.667e-07 [7.6668194616292154E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 1.9564s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6723s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2841s for 90112 events => throughput is 3.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.4309s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2769s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1540s for 90112 events => throughput is 5.85E+05 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438261E-007) and hip (7.6668141007936531E-007) differ by less than 4E-4 (7.494187359569082e-07) +OK! xsec from fortran (7.6668083551438230E-007) and cuda (7.6668194616292154E-007) differ by less than 4E-4 (1.4486452351025747e-06) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.255455e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.229528e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.207584e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.512458e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.176201e+06 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.376887e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.009662e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.385341e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.174315e+06 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.353397e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.977217e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.396782e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.171851e+06 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.372639e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.667254e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.860961e+05 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt index 739e2ff231..66daeb0e97 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -make USEBUILDDIR=1 BACKEND=hip + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' OMP_NUM_THREADS= -DATE: 2024-05-16_21:06:36 +DATE: 2024-05-16_06:05:05 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084370E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 1041 events) - [COUNTERS] PROGRAM TOTAL : 1.6435s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2582s - [COUNTERS] Fortran MEs ( 1 ) : 1.3854s for 8192 events => throughput is 5.91E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7059s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3465s + [COUNTERS] Fortran MEs ( 1 ) : 2.3593s for 8192 events => throughput is 3.47E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084370E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.6438s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2579s - [COUNTERS] Fortran MEs ( 1 ) : 1.3859s for 8192 events => throughput is 5.91E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7035s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3438s + [COUNTERS] Fortran MEs ( 1 ) : 2.3597s for 8192 events => throughput is 3.47E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x10_fortran > /tmp/valassia/output_smeftggtttt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /tmp/avalassi/output_smeftggtttt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551438261E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 16.5842s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3403s - [COUNTERS] Fortran MEs ( 1 ) : 15.2439s for 90112 events => throughput is 5.91E+03 events/s + [COUNTERS] PROGRAM TOTAL : 27.5770s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8193s + [COUNTERS] Fortran MEs ( 1 ) : 25.7577s for 90112 events => throughput is 3.50E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,22 +124,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896696375074458E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.99e-07 [7.9896696375074447E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 4.3380s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2885s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.0494s for 8192 events => throughput is 4.00E+03 events/s + [COUNTERS] PROGRAM TOTAL : 5.3005s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7718s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.5286s for 8192 events => throughput is 3.24E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084370E-007) and cpp (7.9896696375074458E-007) differ by less than 2E-4 (1.9775659776399834e-08) +OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896696375074447E-007) differ by less than 2E-4 (1.9775660775600556e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -157,36 +157,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668081976882384E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.667e-07 [7.6668081976882373E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 25.9227s - [COUNTERS] Fortran Overhead ( 0 ) : 3.3599s - [COUNTERS] CudaCpp MEs ( 2 ) : 22.5629s for 90112 events => throughput is 3.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 31.9857s + [COUNTERS] Fortran Overhead ( 0 ) : 4.2191s + [COUNTERS] CudaCpp MEs ( 2 ) : 27.7666s for 90112 events => throughput is 3.25E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438261E-007) and cpp (7.6668081976882384E-007) differ by less than 2E-4 (2.0537305855938826e-08) +OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668081976882373E-007) differ by less than 2E-4 (2.0537305522871918e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.082539e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.386546e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.089958e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.397002e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -200,22 +200,22 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896696285825699E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.99e-07 [7.9896696285825688E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.0479s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1518s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8960s for 8192 events => throughput is 9.14E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.8744s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5934s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2810s for 8192 events => throughput is 6.40E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084370E-007) and cpp (7.9896696285825699E-007) differ by less than 2E-4 (2.0892711671827158e-08) +OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896696285825688E-007) differ by less than 2E-4 (2.089271267102788e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -233,36 +233,36 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668081890954439E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.667e-07 [7.6668081890954375E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 12.0879s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2246s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.8633s for 90112 events => throughput is 9.14E+03 events/s + [COUNTERS] PROGRAM TOTAL : 17.1795s + [COUNTERS] Fortran Overhead ( 0 ) : 3.0414s + [COUNTERS] CudaCpp MEs ( 2 ) : 14.1381s for 90112 events => throughput is 6.37E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438261E-007) and cpp (7.6668081890954439E-007) differ by less than 2E-4 (2.1658084325970606e-08) +OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668081890954375E-007) differ by less than 2E-4 (2.1658084770059816e-08) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.370714e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.678262e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.376189e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.743588e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -276,8 +276,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -285,13 +285,13 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896696427369838E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.0792s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6832s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3961s for 8192 events => throughput is 2.07E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.4779s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9015s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5764s for 8192 events => throughput is 1.42E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084370E-007) and cpp (7.9896696427369838E-007) differ by less than 2E-4 (1.912112224111695e-08) +OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896696427369838E-007) differ by less than 2E-4 (1.9121123240317672e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -309,8 +309,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -318,35 +318,181 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668082030339872E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 6.0941s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7388s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.3553s for 90112 events => throughput is 2.07E+04 events/s + [COUNTERS] PROGRAM TOTAL : 8.7035s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3540s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.3495s for 90112 events => throughput is 1.42E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438261E-007) and cpp (7.6668082030339872E-007) differ by less than 2E-4 (1.984004716071297e-08) +OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668082030339872E-007) differ by less than 2E-4 (1.984004671662376e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.123477e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.454724e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.476512e+04 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.99e-07 [7.9896696427369838E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 215 events (found 963 events) + [COUNTERS] PROGRAM TOTAL : 1.3378s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8314s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5064s for 8192 events => throughput is 1.62E+04 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896696427369838E-007) differ by less than 2E-4 (1.9121123240317672e-08) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.667e-07 [7.6668082030339872E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1700 events (found 1705 events) + [COUNTERS] PROGRAM TOTAL : 7.8877s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3042s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.5835s for 90112 events => throughput is 1.61E+04 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668082030339872E-007) differ by less than 2E-4 (1.984004671662376e-08) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.679497e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.126374e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.670221e+04 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.99e-07 [7.9896696427369838E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 215 events (found 963 events) + [COUNTERS] PROGRAM TOTAL : 1.7201s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0244s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6957s for 8192 events => throughput is 1.18E+04 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896696427369838E-007) differ by less than 2E-4 (1.9121123240317672e-08) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.667e-07 [7.6668082030339872E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1700 events (found 1705 events) + [COUNTERS] PROGRAM TOTAL : 10.1095s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4855s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.6240s for 90112 events => throughput is 1.18E+04 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668082030339872E-007) differ by less than 2E-4 (1.984004671662376e-08) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.204025e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.203510e+04 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -358,28 +504,28 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697918297697E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.99e-07 [7.9896697918297644E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.7763s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7263s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0500s for 8192 events => throughput is 1.64E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8365s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8192s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0172s for 8192 events => throughput is 4.75E+05 events/s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084370E-007) and hip (7.9896697918297697E-007) differ by less than 2E-4 (4.604279180142612e-10) +OK! xsec from fortran (7.9896697955084444E-007) and cuda (7.9896697918297644E-007) differ by less than 2E-4 (4.6042958334879813e-10) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -391,65 +537,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551547613E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.667e-07 [7.6668083551547592E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 2.2813s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7304s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5509s for 90112 events => throughput is 1.64E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.4662s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2768s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1894s for 90112 events => throughput is 4.76E+05 events/s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438261E-007) and hip (7.6668083551547613E-007) differ by less than 2E-4 (1.426192497433476e-12) +OK! xsec from fortran (7.6668083551438230E-007) and cuda (7.6668083551547592E-007) differ by less than 2E-4 (1.4264145420384011e-12) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.662460e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.814879e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.656556e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.185918e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.210836e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.154361e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.749518e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.382253e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.207483e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.160102e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.216328e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.387193e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.201399e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.108981e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.131995e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.750323e+05 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt index 8b8d5ea27e..059122dda6 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -make USEBUILDDIR=1 BACKEND=hip -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' OMP_NUM_THREADS= -DATE: 2024-05-16_21:02:53 +DATE: 2024-05-16_06:00:05 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,8 +49,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 685 events (found 2208 events) - [COUNTERS] PROGRAM TOTAL : 0.4498s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4439s - [COUNTERS] Fortran MEs ( 1 ) : 0.0059s for 8192 events => throughput is 1.39E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4148s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4054s + [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.76E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,8 +74,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 648 events (found 1275 events) - [COUNTERS] PROGRAM TOTAL : 0.2466s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2407s - [COUNTERS] Fortran MEs ( 1 ) : 0.0059s for 8192 events => throughput is 1.38E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3154s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3059s + [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.68E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x10_fortran > /tmp/valassia/output_susyggt1t1_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /tmp/avalassi/output_susyggt1t1_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.4762 [0.47620722822826017] fbridge_mode=0 + [XSECTION] Cross section = 0.4762 [0.47620722822826000] fbridge_mode=0 [UNWEIGHT] Wrote 1784 events (found 1789 events) - [COUNTERS] PROGRAM TOTAL : 1.0920s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0296s - [COUNTERS] Fortran MEs ( 1 ) : 0.0623s for 90112 events => throughput is 1.45E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3773s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2771s + [COUNTERS] Fortran MEs ( 1 ) : 0.1002s for 90112 events => throughput is 8.99E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,15 +124,15 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 [XSECTION] ERROR! No cross section in log file: - /tmp/valassia/output_susyggt1t1_x1_cudacpp + /tmp/avalassi/output_susyggt1t1_x1_cudacpp ... xqcutij # 3> 0.0 0.0 RESET CUMULATIVE VARIABLE diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt index 6b80cd42f0..01167da954 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -make USEBUILDDIR=1 BACKEND=hip -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' OMP_NUM_THREADS= -DATE: 2024-05-16_21:02:57 +DATE: 2024-05-16_06:00:09 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,8 +49,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 685 events (found 2208 events) - [COUNTERS] PROGRAM TOTAL : 0.3127s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3069s - [COUNTERS] Fortran MEs ( 1 ) : 0.0058s for 8192 events => throughput is 1.40E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4095s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4004s + [COUNTERS] Fortran MEs ( 1 ) : 0.0092s for 8192 events => throughput is 8.95E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,8 +74,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 648 events (found 1275 events) - [COUNTERS] PROGRAM TOTAL : 0.2478s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2419s - [COUNTERS] Fortran MEs ( 1 ) : 0.0059s for 8192 events => throughput is 1.40E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3199s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3104s + [COUNTERS] Fortran MEs ( 1 ) : 0.0095s for 8192 events => throughput is 8.61E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x10_fortran > /tmp/valassia/output_susyggt1t1_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /tmp/avalassi/output_susyggt1t1_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.4762 [0.47620722822826017] fbridge_mode=0 + [XSECTION] Cross section = 0.4762 [0.47620722822826000] fbridge_mode=0 [UNWEIGHT] Wrote 1784 events (found 1789 events) - [COUNTERS] PROGRAM TOTAL : 1.0950s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0319s - [COUNTERS] Fortran MEs ( 1 ) : 0.0631s for 90112 events => throughput is 1.43E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3992s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2973s + [COUNTERS] Fortran MEs ( 1 ) : 0.1018s for 90112 events => throughput is 8.85E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,15 +124,15 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 [XSECTION] ERROR! No cross section in log file: - /tmp/valassia/output_susyggt1t1_x1_cudacpp + /tmp/avalassi/output_susyggt1t1_x1_cudacpp ... xqcutij # 3> 0.0 0.0 RESET CUMULATIVE VARIABLE diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt index a73952ea72..6c876298cd 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' OMP_NUM_THREADS= -DATE: 2024-05-16_21:03:01 +DATE: 2024-05-16_06:00:14 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,8 +49,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 685 events (found 2208 events) - [COUNTERS] PROGRAM TOTAL : 0.3085s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3027s - [COUNTERS] Fortran MEs ( 1 ) : 0.0058s for 8192 events => throughput is 1.41E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4113s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4020s + [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.76E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,8 +74,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 648 events (found 1275 events) - [COUNTERS] PROGRAM TOTAL : 0.2493s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2435s - [COUNTERS] Fortran MEs ( 1 ) : 0.0058s for 8192 events => throughput is 1.41E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3138s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3043s + [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.67E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x10_fortran > /tmp/valassia/output_susyggt1t1_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /tmp/avalassi/output_susyggt1t1_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.4762 [0.47620722822826017] fbridge_mode=0 + [XSECTION] Cross section = 0.4762 [0.47620722822826000] fbridge_mode=0 [UNWEIGHT] Wrote 1784 events (found 1789 events) - [COUNTERS] PROGRAM TOTAL : 1.0959s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0335s - [COUNTERS] Fortran MEs ( 1 ) : 0.0623s for 90112 events => throughput is 1.45E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3758s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2758s + [COUNTERS] Fortran MEs ( 1 ) : 0.1001s for 90112 events => throughput is 9.01E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,15 +124,15 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 [XSECTION] ERROR! No cross section in log file: - /tmp/valassia/output_susyggt1t1_x1_cudacpp + /tmp/avalassi/output_susyggt1t1_x1_cudacpp ... xqcutij # 3> 0.0 0.0 RESET CUMULATIVE VARIABLE diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt index 44d76f3e44..fd24a61552 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=hip + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-05-16_21:02:38 +DATE: 2024-05-16_05:59:47 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860065419849] fbridge_mode=0 + [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 2620 events (found 5403 events) - [COUNTERS] PROGRAM TOTAL : 0.6960s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6674s - [COUNTERS] Fortran MEs ( 1 ) : 0.0287s for 8192 events => throughput is 2.86E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8237s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7798s + [COUNTERS] Fortran MEs ( 1 ) : 0.0440s for 8192 events => throughput is 1.86E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860065419849] fbridge_mode=0 + [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3170s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2884s - [COUNTERS] Fortran MEs ( 1 ) : 0.0286s for 8192 events => throughput is 2.86E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4191s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3754s + [COUNTERS] Fortran MEs ( 1 ) : 0.0437s for 8192 events => throughput is 1.87E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x10_fortran > /tmp/valassia/output_susyggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp/avalassi/output_susyggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.58 [44.577523870256471] fbridge_mode=0 + [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.3407s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0288s - [COUNTERS] Fortran MEs ( 1 ) : 0.3120s for 90112 events => throughput is 2.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8130s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3278s + [COUNTERS] Fortran MEs ( 1 ) : 0.4852s for 90112 events => throughput is 1.86E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,19 +124,19 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 171.8 [171.81273026311092] fbridge_mode=1 + [XSECTION] Cross section = 171.8 [171.81273026311101] fbridge_mode=1 [UNWEIGHT] Wrote 2338 events (found 3965 events) - [COUNTERS] PROGRAM TOTAL : 0.5425s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5103s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0322s for 8192 events => throughput is 2.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7007s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6611s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0395s for 8192 events => throughput is 2.07E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -ERROR! xsec from fortran (44.598860065419849) and cpp (171.81273026311092) differ by more than 3E-14 (2.8524018329411867) +ERROR! xsec from fortran (44.598860065419856) and cpp (171.81273026311101) differ by more than 3E-14 (2.852401832941188) diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt index 792d5133aa..293718b73f 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-05-16_21:02:43 +DATE: 2024-05-16_05:59:53 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860065419849] fbridge_mode=0 + [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 2620 events (found 5403 events) - [COUNTERS] PROGRAM TOTAL : 0.5882s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5595s - [COUNTERS] Fortran MEs ( 1 ) : 0.0286s for 8192 events => throughput is 2.86E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8342s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7896s + [COUNTERS] Fortran MEs ( 1 ) : 0.0446s for 8192 events => throughput is 1.84E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860065419849] fbridge_mode=0 + [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3184s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2899s - [COUNTERS] Fortran MEs ( 1 ) : 0.0285s for 8192 events => throughput is 2.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4178s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3733s + [COUNTERS] Fortran MEs ( 1 ) : 0.0444s for 8192 events => throughput is 1.84E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x10_fortran > /tmp/valassia/output_susyggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp/avalassi/output_susyggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.58 [44.577523870256471] fbridge_mode=0 + [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.3414s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0272s - [COUNTERS] Fortran MEs ( 1 ) : 0.3142s for 90112 events => throughput is 2.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8125s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3299s + [COUNTERS] Fortran MEs ( 1 ) : 0.4826s for 90112 events => throughput is 1.87E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,19 +124,19 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 171.8 [171.81269679287095] fbridge_mode=1 + [XSECTION] Cross section = 171.8 [171.81270286137041] fbridge_mode=1 [UNWEIGHT] Wrote 2338 events (found 3965 events) - [COUNTERS] PROGRAM TOTAL : 0.5126s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4850s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0276s for 8192 events => throughput is 2.97E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7025s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6657s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0367s for 8192 events => throughput is 2.23E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -ERROR! xsec from fortran (44.598860065419849) and cpp (171.81269679287095) differ by more than 4E-4 (2.8524010824681945) +ERROR! xsec from fortran (44.598860065419856) and cpp (171.81270286137041) differ by more than 4E-4 (2.8524012185366816) diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt index c8837e947f..f9ac9cdc3d 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-05-16_21:02:48 +DATE: 2024-05-16_05:59:59 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860065419849] fbridge_mode=0 + [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 2620 events (found 5403 events) - [COUNTERS] PROGRAM TOTAL : 0.5881s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5596s - [COUNTERS] Fortran MEs ( 1 ) : 0.0285s for 8192 events => throughput is 2.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8401s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7957s + [COUNTERS] Fortran MEs ( 1 ) : 0.0444s for 8192 events => throughput is 1.84E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860065419849] fbridge_mode=0 + [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3180s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2895s - [COUNTERS] Fortran MEs ( 1 ) : 0.0286s for 8192 events => throughput is 2.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4268s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3820s + [COUNTERS] Fortran MEs ( 1 ) : 0.0448s for 8192 events => throughput is 1.83E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x10_fortran > /tmp/valassia/output_susyggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp/avalassi/output_susyggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.58 [44.577523870256471] fbridge_mode=0 + [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.3405s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0289s - [COUNTERS] Fortran MEs ( 1 ) : 0.3116s for 90112 events => throughput is 2.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8476s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3594s + [COUNTERS] Fortran MEs ( 1 ) : 0.4881s for 90112 events => throughput is 1.85E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,19 +124,19 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 171.8 [171.81273490068895] fbridge_mode=1 + [XSECTION] Cross section = 171.8 [171.81273490068889] fbridge_mode=1 [UNWEIGHT] Wrote 2338 events (found 3965 events) - [COUNTERS] PROGRAM TOTAL : 0.5331s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5001s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0330s for 8192 events => throughput is 2.48E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7032s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6624s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0408s for 8192 events => throughput is 2.01E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -ERROR! xsec from fortran (44.598860065419849) and cpp (171.81273490068895) differ by more than 2E-4 (2.8524019369254145) +ERROR! xsec from fortran (44.598860065419856) and cpp (171.81273490068889) differ by more than 2E-4 (2.8524019369254128) diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index 62f5a1747b..8f9275e4c9 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_15:54:35 +DATE: 2024-05-16_14:32:11 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.192740e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.108907e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.336897e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 -TOTAL : 0.552555 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.832141e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.963737e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.194654e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.663591 sec INFO: No Floating Point Exceptions have been reported - 1,305,552,637 cycles:u # 2.392 GHz (74.72%) - 2,182,824 stalled-cycles-frontend:u # 0.17% frontend cycles idle (75.11%) - 5,346,063 stalled-cycles-backend:u # 0.41% backend cycles idle (76.06%) - 2,102,532,654 instructions:u # 1.61 insn per cycle - # 0.00 stalled cycles per insn (75.80%) - 0.627941411 seconds time elapsed + 2,552,888,166 cycles # 2.852 GHz + 3,988,214,096 instructions # 1.56 insn per cycle + 0.955666761 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165208E-002 -Relative difference = 1.0277079981222336e-08 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.209249e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.380707e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.380707e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.951508 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.053499e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.235255e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.235255e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.389071 sec INFO: No Floating Point Exceptions have been reported - 19,608,150,481 cycles:u # 3.291 GHz (74.93%) - 51,216,372 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.98%) - 54,836,990 stalled-cycles-backend:u # 0.28% backend cycles idle (75.03%) - 46,950,513,606 instructions:u # 2.39 insn per cycle - # 0.00 stalled cycles per insn (75.03%) - 5.966260573 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 472) (avx2: 0) (512y: 0) (512z: 0) + 18,286,976,651 cycles # 2.860 GHz + 43,966,894,713 instructions # 2.40 insn per cycle + 6.394304827 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.889128e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.377852e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.377852e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.063095 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.575090e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.062276e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.062276e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.417510 sec INFO: No Floating Point Exceptions have been reported - 13,176,650,260 cycles:u # 3.235 GHz (74.81%) - 52,164,290 stalled-cycles-frontend:u # 0.40% frontend cycles idle (74.97%) - 678,636,146 stalled-cycles-backend:u # 5.15% backend cycles idle (75.06%) - 31,192,438,300 instructions:u # 2.37 insn per cycle - # 0.02 stalled cycles per insn (75.06%) - 4.077189517 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1626) (avx2: 0) (512y: 0) (512z: 0) + 12,744,894,916 cycles # 2.883 GHz + 31,001,019,523 instructions # 2.43 insn per cycle + 4.422588286 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1644) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.592796e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.443978e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.443978e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.173904 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.946862e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.717949e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.717949e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.650052 sec INFO: No Floating Point Exceptions have been reported - 10,145,365,093 cycles:u # 3.186 GHz (74.89%) - 51,976,186 stalled-cycles-frontend:u # 0.51% frontend cycles idle (74.89%) - 1,275,348,320 stalled-cycles-backend:u # 12.57% backend cycles idle (74.97%) - 19,402,420,759 instructions:u # 1.91 insn per cycle - # 0.07 stalled cycles per insn (75.10%) - 3.187813666 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1964) (512y: 0) (512z: 0) + 10,049,299,034 cycles # 2.750 GHz + 19,366,983,583 instructions # 1.93 insn per cycle + 3.655131055 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1966) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165090E-002 -Relative difference = 1.0277089176796747e-08 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.022084e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.853820e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.853820e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.526182 sec +INFO: No Floating Point Exceptions have been reported + 9,727,313,452 cycles # 2.755 GHz + 18,976,774,064 instructions # 1.95 insn per cycle + 3.531366474 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1690) (512y: 181) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.695166e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.233065e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.233065e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.130967 sec +INFO: No Floating Point Exceptions have been reported + 8,580,931,991 cycles # 2.075 GHz + 15,727,945,386 instructions # 1.83 insn per cycle + 4.136130895 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 901) (512y: 154) (512z: 1258) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt index a1f621c196..556a164c58 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt @@ -1,182 +1,231 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_16:26:26 +DATE: 2024-05-16_14:59:51 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.452763e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.436216e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.436216e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.562380 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.482485e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.592798e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.592798e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 2.321030 sec INFO: No Floating Point Exceptions have been reported - 18,356,696,438 cycles:u # 3.283 GHz (74.97%) - 218,525,993 stalled-cycles-frontend:u # 1.19% frontend cycles idle (74.97%) - 6,906,420,713 stalled-cycles-backend:u # 37.62% backend cycles idle (74.89%) - 17,048,626,162 instructions:u # 0.93 insn per cycle - # 0.41 stalled cycles per insn (75.01%) - 5.622622682 seconds time elapsed + 7,329,722,152 cycles # 2.840 GHz + 13,178,162,400 instructions # 1.80 insn per cycle + 2.637544426 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165208E-002 -Relative difference = 1.0277079981222336e-08 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.218933e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.388755e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.388755e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.996185 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.015910e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.185158e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.185158e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.819356 sec INFO: No Floating Point Exceptions have been reported - 20,008,740,019 cycles:u # 3.326 GHz (75.00%) - 51,656,692 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.94%) - 118,408,767 stalled-cycles-backend:u # 0.59% backend cycles idle (74.94%) - 47,242,044,176 instructions:u # 2.36 insn per cycle - # 0.00 stalled cycles per insn (75.00%) - 6.019345525 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 472) (avx2: 0) (512y: 0) (512z: 0) + 19,469,024,508 cycles # 2.853 GHz + 44,194,459,972 instructions # 2.27 insn per cycle + 6.826135735 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.860775e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.335297e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.335297e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.250665 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.484528e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.911785e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.911785e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.870031 sec INFO: No Floating Point Exceptions have been reported - 13,907,967,553 cycles:u # 3.258 GHz (74.91%) - 53,859,439 stalled-cycles-frontend:u # 0.39% frontend cycles idle (74.89%) - 799,802,404 stalled-cycles-backend:u # 5.75% backend cycles idle (74.97%) - 31,962,126,402 instructions:u # 2.30 insn per cycle - # 0.03 stalled cycles per insn (75.06%) - 4.272203950 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1626) (avx2: 0) (512y: 0) (512z: 0) + 13,928,289,994 cycles # 2.857 GHz + 31,840,505,402 instructions # 2.29 insn per cycle + 4.876819018 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1644) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.476048e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.231168e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.231168e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.423761 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.770728e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.386706e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.386706e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.194561 sec INFO: No Floating Point Exceptions have been reported - 10,850,734,830 cycles:u # 3.152 GHz (74.91%) - 49,643,502 stalled-cycles-frontend:u # 0.46% frontend cycles idle (74.91%) - 1,327,332,054 stalled-cycles-backend:u # 12.23% backend cycles idle (75.03%) - 20,560,922,677 instructions:u # 1.89 insn per cycle - # 0.06 stalled cycles per insn (75.14%) - 3.446530381 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1964) (512y: 0) (512z: 0) + 11,361,909,372 cycles # 2.705 GHz + 20,728,193,515 instructions # 1.82 insn per cycle + 4.201564491 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1966) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165090E-002 -Relative difference = 1.0277089176796747e-08 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.837895e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.511052e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.511052e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.060412 sec +INFO: No Floating Point Exceptions have been reported + 10,967,372,142 cycles # 2.697 GHz + 20,348,024,135 instructions # 1.86 insn per cycle + 4.067336299 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1690) (512y: 181) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.574771e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.036722e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.036722e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.642745 sec +INFO: No Floating Point Exceptions have been reported + 9,876,735,144 cycles # 2.125 GHz + 16,873,564,045 instructions # 1.71 insn per cycle + 4.649693422 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 901) (512y: 154) (512z: 1258) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt index 62a102726d..752636bf13 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_16:33:35 +DATE: 2024-05-16_15:10:56 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.296438e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.102193e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.329275e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.514734e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.592124e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.118434e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.838234 sec +TOTAL : 1.385277 sec INFO: No Floating Point Exceptions have been reported - 15,394,934,549 cycles:u # 3.175 GHz (74.78%) - 154,237,641 stalled-cycles-frontend:u # 1.00% frontend cycles idle (74.96%) - 6,875,767,005 stalled-cycles-backend:u # 44.66% backend cycles idle (75.09%) - 11,503,925,750 instructions:u # 0.75 insn per cycle - # 0.60 stalled cycles per insn (75.09%) - 4.891696810 seconds time elapsed + 4,585,820,337 cycles # 2.836 GHz + 7,177,605,134 instructions # 1.57 insn per cycle + 1.675534023 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165208E-002 -Relative difference = 1.0277079981222336e-08 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.205745e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.375909e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.375909e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.053610e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.236408e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.236408e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.967860 sec +TOTAL : 6.764205 sec INFO: No Floating Point Exceptions have been reported - 19,597,182,675 cycles:u # 3.278 GHz (74.98%) - 51,611,081 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.98%) - 52,432,484 stalled-cycles-backend:u # 0.27% backend cycles idle (74.98%) - 47,079,710,649 instructions:u # 2.40 insn per cycle - # 0.00 stalled cycles per insn (74.98%) - 5.979908816 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 472) (avx2: 0) (512y: 0) (512z: 0) + 19,353,708,257 cycles # 2.859 GHz + 44,070,957,602 instructions # 2.28 insn per cycle + 6.769682162 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.876605e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.377107e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.377107e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.538542e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.015186e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.015186e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.093970 sec +TOTAL : 4.882143 sec INFO: No Floating Point Exceptions have been reported - 13,286,523,847 cycles:u # 3.237 GHz (74.92%) - 53,103,722 stalled-cycles-frontend:u # 0.40% frontend cycles idle (75.02%) - 696,597,262 stalled-cycles-backend:u # 5.24% backend cycles idle (75.05%) - 31,069,991,906 instructions:u # 2.34 insn per cycle - # 0.02 stalled cycles per insn (75.05%) - 4.105527074 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1626) (avx2: 0) (512y: 0) (512z: 0) + 13,858,733,837 cycles # 2.836 GHz + 31,001,638,282 instructions # 2.24 insn per cycle + 4.887574523 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1644) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.592189e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.449037e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.449037e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.916460e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.668694e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.668694e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.167352 sec +TOTAL : 4.073472 sec INFO: No Floating Point Exceptions have been reported - 10,175,950,003 cycles:u # 3.202 GHz (74.93%) - 52,112,697 stalled-cycles-frontend:u # 0.51% frontend cycles idle (75.05%) - 1,283,978,771 stalled-cycles-backend:u # 12.62% backend cycles idle (75.08%) - 19,503,807,353 instructions:u # 1.92 insn per cycle - # 0.07 stalled cycles per insn (75.08%) - 3.179566398 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1964) (512y: 0) (512z: 0) + 11,156,313,503 cycles # 2.736 GHz + 19,267,334,271 instructions # 1.73 insn per cycle + 4.078862770 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1966) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165090E-002 -Relative difference = 1.0277089176796747e-08 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.001009e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.815270e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.815270e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.939832 sec +INFO: No Floating Point Exceptions have been reported + 10,815,664,939 cycles # 2.742 GHz + 18,691,798,772 instructions # 1.73 insn per cycle + 3.945208768 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1690) (512y: 181) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.659550e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.188571e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.188571e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 4.600299 sec +INFO: No Floating Point Exceptions have been reported + 9,711,022,403 cycles # 2.112 GHz + 15,432,876,214 instructions # 1.59 insn per cycle + 4.605929662 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 901) (512y: 154) (512z: 1258) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt index 6b8c442cf3..8320028620 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt @@ -1,176 +1,220 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_16:31:23 +DATE: 2024-05-16_15:05:29 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 12 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.420460e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.083294e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.310383e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.495814 sec +EvtsPerSec[Rmb+ME] (23) = ( 5.845001e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.545626e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.012755e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 1.945484 sec INFO: No Floating Point Exceptions have been reported - 17,943,418,670 cycles:u # 3.271 GHz (74.94%) - 220,924,204 stalled-cycles-frontend:u # 1.23% frontend cycles idle (74.98%) - 6,860,803,865 stalled-cycles-backend:u # 38.24% backend cycles idle (75.06%) - 16,753,890,275 instructions:u # 0.93 insn per cycle - # 0.41 stalled cycles per insn (75.00%) - 5.547810485 seconds time elapsed + 6,182,952,030 cycles # 2.840 GHz + 11,472,065,600 instructions # 1.86 insn per cycle + 2.234065267 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165208E-002 -Relative difference = 1.0277079981222336e-08 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.183732e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.346918e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.346918e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 6.088321 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.053191e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.234940e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.234940e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.394281 sec INFO: No Floating Point Exceptions have been reported - 19,581,171,404 cycles:u # 3.210 GHz (74.96%) - 50,180,129 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.96%) - 47,121,604 stalled-cycles-backend:u # 0.24% backend cycles idle (74.96%) - 46,989,064,995 instructions:u # 2.40 insn per cycle - # 0.00 stalled cycles per insn (74.97%) - 6.101947004 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 472) (avx2: 0) (512y: 0) (512z: 0) + 18,294,474,856 cycles # 2.859 GHz + 43,971,000,114 instructions # 2.40 insn per cycle + 6.399562206 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.867754e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.359860e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.359860e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.119915 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.557791e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.040989e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.040989e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.463787 sec INFO: No Floating Point Exceptions have been reported - 13,263,190,140 cycles:u # 3.210 GHz (74.95%) - 51,197,022 stalled-cycles-frontend:u # 0.39% frontend cycles idle (75.03%) - 820,166,823 stalled-cycles-backend:u # 6.18% backend cycles idle (75.03%) - 31,165,784,636 instructions:u # 2.35 insn per cycle - # 0.03 stalled cycles per insn (75.03%) - 4.134555430 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1626) (avx2: 0) (512y: 0) (512z: 0) + 12,772,778,524 cycles # 2.859 GHz + 30,998,712,334 instructions # 2.43 insn per cycle + 4.469196075 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1644) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.630821e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.488220e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.488220e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.129720 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.925684e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.680642e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.680642e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.688587 sec INFO: No Floating Point Exceptions have been reported - 10,175,967,529 cycles:u # 3.239 GHz (74.89%) - 52,037,851 stalled-cycles-frontend:u # 0.51% frontend cycles idle (75.01%) - 1,289,035,436 stalled-cycles-backend:u # 12.67% backend cycles idle (75.05%) - 19,120,632,315 instructions:u # 1.88 insn per cycle - # 0.07 stalled cycles per insn (75.05%) - 3.143235076 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1964) (512y: 0) (512z: 0) + 10,072,112,495 cycles # 2.727 GHz + 19,365,616,714 instructions # 1.92 insn per cycle + 3.694022814 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1966) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165090E-002 -Relative difference = 1.0277089176796747e-08 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.999182e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.825411e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.825411e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.565445 sec +INFO: No Floating Point Exceptions have been reported + 9,737,532,909 cycles # 2.728 GHz + 18,976,607,709 instructions # 1.95 insn per cycle + 3.570830090 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1690) (512y: 181) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.671566e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.197159e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.197159e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.185653 sec +INFO: No Floating Point Exceptions have been reported + 8,570,262,444 cycles # 2.045 GHz + 15,727,819,138 instructions # 1.84 insn per cycle + 4.191109662 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 901) (512y: 154) (512z: 1258) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt index e28e3db96a..6aa3de3ecf 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_15:54:55 +DATE: 2024-05-16_14:32:42 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.849422e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.590513e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.911390e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 -TOTAL : 0.510534 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.832072e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.951586e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.177922e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.666506 sec INFO: No Floating Point Exceptions have been reported - 1,226,528,626 cycles:u # 2.377 GHz (73.52%) - 2,221,159 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.57%) - 5,251,388 stalled-cycles-backend:u # 0.43% backend cycles idle (75.22%) - 2,084,638,085 instructions:u # 1.70 insn per cycle - # 0.00 stalled cycles per insn (75.35%) - 0.567128402 seconds time elapsed + 2,570,661,306 cycles # 2.841 GHz + 3,994,547,928 instructions # 1.55 insn per cycle + 0.967516454 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165216E-002 -Relative difference = 1.0277079305077159e-08 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.294661e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.490725e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.490725e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.604903 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.115974e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.322257e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.322257e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.054676 sec INFO: No Floating Point Exceptions have been reported - 18,501,031,339 cycles:u # 3.294 GHz (74.96%) - 51,332,716 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.94%) - 844,407,865 stalled-cycles-backend:u # 4.56% backend cycles idle (74.94%) - 44,830,087,800 instructions:u # 2.42 insn per cycle - # 0.02 stalled cycles per insn (74.97%) - 5.619831578 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 486) (avx2: 0) (512y: 0) (512z: 0) + 17,515,565,744 cycles # 2.891 GHz + 41,813,477,100 instructions # 2.39 insn per cycle + 6.059803806 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 392) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.977593e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.507832e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.507832e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.909334 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.620797e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.138518e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.138518e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.301844 sec INFO: No Floating Point Exceptions have been reported - 12,867,611,585 cycles:u # 3.283 GHz (74.92%) - 49,300,942 stalled-cycles-frontend:u # 0.38% frontend cycles idle (74.91%) - 1,659,477,790 stalled-cycles-backend:u # 12.90% backend cycles idle (74.93%) - 30,159,312,555 instructions:u # 2.34 insn per cycle - # 0.06 stalled cycles per insn (75.03%) - 3.924169057 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1569) (avx2: 0) (512y: 0) (512z: 0) + 12,450,766,554 cycles # 2.891 GHz + 30,161,114,565 instructions # 2.42 insn per cycle + 4.307292943 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1612) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.544143e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.365618e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.365618e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.213304 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.953305e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.731201e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.731201e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.641578 sec INFO: No Floating Point Exceptions have been reported - 10,293,286,723 cycles:u # 3.193 GHz (74.95%) - 50,260,134 stalled-cycles-frontend:u # 0.49% frontend cycles idle (74.94%) - 334,691,875 stalled-cycles-backend:u # 3.25% backend cycles idle (74.95%) - 18,981,075,658 instructions:u # 1.84 insn per cycle - # 0.02 stalled cycles per insn (74.95%) - 3.227163156 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1902) (512y: 0) (512z: 0) + 9,958,194,708 cycles # 2.732 GHz + 19,097,340,022 instructions # 1.92 insn per cycle + 3.646748223 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1931) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165090E-002 Relative difference = 1.0277089176796747e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.030144e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.871070e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.871070e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.516336 sec +INFO: No Floating Point Exceptions have been reported + 9,615,329,857 cycles # 2.731 GHz + 18,757,197,169 instructions # 1.95 insn per cycle + 3.521486960 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1662) (512y: 178) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165090E-002 +Relative difference = 1.0277089176796747e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.716452e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.276447e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.276447e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.085042 sec +INFO: No Floating Point Exceptions have been reported + 8,419,965,935 cycles # 2.059 GHz + 15,604,092,420 instructions # 1.85 insn per cycle + 4.090208290 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 887) (512y: 156) (512z: 1239) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt index 7c1de5f530..b8b45776b1 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_16:14:10 +DATE: 2024-05-16_14:50:17 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.180573e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.087080e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.343874e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 -TOTAL : 0.510647 sec +EvtsPerSec[Rmb+ME] (23) = ( 6.702175e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.710921e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.156854e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.694390 sec INFO: No Floating Point Exceptions have been reported - 1,317,606,045 cycles:u # 2.555 GHz (75.14%) - 2,106,656 stalled-cycles-frontend:u # 0.16% frontend cycles idle (75.24%) - 5,011,590 stalled-cycles-backend:u # 0.38% backend cycles idle (75.33%) - 2,061,741,334 instructions:u # 1.56 insn per cycle - # 0.00 stalled cycles per insn (75.40%) - 0.562010535 seconds time elapsed + 2,605,646,002 cycles # 2.805 GHz + 4,054,878,448 instructions # 1.56 insn per cycle + 0.990293563 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165208E-002 -Relative difference = 1.0277079981222336e-08 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.790157e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.173751e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.173751e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.241298 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.570616e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.013848e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.013848e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.427060 sec INFO: No Floating Point Exceptions have been reported - 14,122,804,467 cycles:u # 3.322 GHz (75.02%) - 52,780,179 stalled-cycles-frontend:u # 0.37% frontend cycles idle (74.98%) - 742,554,443 stalled-cycles-backend:u # 5.26% backend cycles idle (74.98%) - 36,765,556,155 instructions:u # 2.60 insn per cycle - # 0.02 stalled cycles per insn (74.98%) - 4.255166528 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 707) (avx2: 0) (512y: 0) (512z: 0) + 12,653,335,495 cycles # 2.855 GHz + 32,508,582,789 instructions # 2.57 insn per cycle + 4.432506316 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 296) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.354973e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.172546e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.172546e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.404789 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.997942e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.867798e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.867798e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.572733 sec INFO: No Floating Point Exceptions have been reported - 10,939,804,848 cycles:u # 3.203 GHz (74.96%) - 52,338,890 stalled-cycles-frontend:u # 0.48% frontend cycles idle (74.94%) - 156,607,813 stalled-cycles-backend:u # 1.43% backend cycles idle (74.95%) - 24,783,195,117 instructions:u # 2.27 insn per cycle - # 0.01 stalled cycles per insn (74.95%) - 3.419320010 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2334) (avx2: 0) (512y: 0) (512z: 0) + 10,224,022,422 cycles # 2.858 GHz + 24,474,305,392 instructions # 2.39 insn per cycle + 3.578147466 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1251) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.029032e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.216078e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.216078e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.801238 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.166030e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.174429e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.174429e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.325864 sec INFO: No Floating Point Exceptions have been reported - 9,086,876,579 cycles:u # 3.231 GHz (74.97%) - 49,992,624 stalled-cycles-frontend:u # 0.55% frontend cycles idle (74.97%) - 136,988,259 stalled-cycles-backend:u # 1.51% backend cycles idle (74.98%) - 16,857,366,216 instructions:u # 1.86 insn per cycle - # 0.01 stalled cycles per insn (74.98%) - 2.832994199 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1604) (512y: 0) (512z: 0) + 9,098,194,590 cycles # 2.732 GHz + 16,922,780,551 instructions # 1.86 insn per cycle + 3.331363940 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1631) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165090E-002 -Relative difference = 1.0277089176796747e-08 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.212975e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.265291e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.265291e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.263243 sec +INFO: No Floating Point Exceptions have been reported + 8,899,581,855 cycles # 2.723 GHz + 16,332,700,862 instructions # 1.84 insn per cycle + 3.268811314 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1370) (512y: 139) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.881513e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.573087e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.573087e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.766130 sec +INFO: No Floating Point Exceptions have been reported + 7,870,004,063 cycles # 2.087 GHz + 14,582,523,760 instructions # 1.85 insn per cycle + 3.771527980 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1015) (512y: 158) (512z: 955) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt index 7bd614f453..36ca3a055a 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_16:14:26 +DATE: 2024-05-16_14:50:44 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.651323e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.584556e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.905916e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 -TOTAL : 0.514513 sec +EvtsPerSec[Rmb+ME] (23) = ( 6.703728e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.728666e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.213805e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.683308 sec INFO: No Floating Point Exceptions have been reported - 1,280,503,839 cycles:u # 2.435 GHz (73.24%) - 2,277,448 stalled-cycles-frontend:u # 0.18% frontend cycles idle (73.70%) - 5,137,824 stalled-cycles-backend:u # 0.40% backend cycles idle (75.39%) - 2,023,880,849 instructions:u # 1.58 insn per cycle - # 0.00 stalled cycles per insn (75.82%) - 0.570351573 seconds time elapsed + 2,600,218,993 cycles # 2.827 GHz + 4,020,842,023 instructions # 1.55 insn per cycle + 0.979103636 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165216E-002 -Relative difference = 1.0277079305077159e-08 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.400394e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.175393e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.175393e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.357588 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.089739e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.950650e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.950650e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.429960 sec INFO: No Floating Point Exceptions have been reported - 10,773,216,593 cycles:u # 3.199 GHz (74.90%) - 50,692,552 stalled-cycles-frontend:u # 0.47% frontend cycles idle (75.02%) - 49,096,202 stalled-cycles-backend:u # 0.46% backend cycles idle (75.06%) - 28,434,930,563 instructions:u # 2.64 insn per cycle - # 0.00 stalled cycles per insn (75.06%) - 3.373586068 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 600) (avx2: 0) (512y: 0) (512z: 0) + 9,811,818,087 cycles # 2.857 GHz + 25,388,363,151 instructions # 2.59 insn per cycle + 3.435408237 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 249) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.618000e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.644001e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.644001e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.133129 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.308707e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.576242e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.576242e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.150608 sec INFO: No Floating Point Exceptions have been reported - 10,198,671,312 cycles:u # 3.245 GHz (74.90%) - 57,108,535 stalled-cycles-frontend:u # 0.56% frontend cycles idle (74.91%) - 61,941,326 stalled-cycles-backend:u # 0.61% backend cycles idle (75.03%) - 21,473,197,144 instructions:u # 2.11 insn per cycle - # 0.00 stalled cycles per insn (75.06%) - 3.174997823 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2117) (avx2: 0) (512y: 0) (512z: 0) + 9,017,040,298 cycles # 2.858 GHz + 21,483,572,468 instructions # 2.38 insn per cycle + 3.156151233 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1119) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.243324e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.693698e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.693698e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.677364 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.317514e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.502934e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.502934e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.137476 sec INFO: No Floating Point Exceptions have been reported - 8,473,678,842 cycles:u # 3.153 GHz (75.00%) - 49,686,047 stalled-cycles-frontend:u # 0.59% frontend cycles idle (75.00%) - 65,234,637 stalled-cycles-backend:u # 0.77% backend cycles idle (75.00%) - 15,802,097,675 instructions:u # 1.86 insn per cycle - # 0.00 stalled cycles per insn (75.00%) - 2.690233127 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1497) (512y: 0) (512z: 0) + 8,591,075,632 cycles # 2.734 GHz + 15,811,134,800 instructions # 1.84 insn per cycle + 3.143116597 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1494) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165084E-002 -Relative difference = 1.0277089582483854e-08 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.364262e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.604788e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.604788e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.081738 sec +INFO: No Floating Point Exceptions have been reported + 8,442,051,612 cycles # 2.735 GHz + 15,504,513,991 instructions # 1.84 insn per cycle + 3.087247738 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1268) (512y: 139) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.995819e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.803385e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.803385e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.575599 sec +INFO: No Floating Point Exceptions have been reported + 7,560,717,738 cycles # 2.112 GHz + 14,283,918,013 instructions # 1.89 insn per cycle + 3.581217674 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1041) (512y: 164) (512z: 874) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index 947ead6bb7..e6a48e18ea 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_15:55:13 +DATE: 2024-05-16_14:33:12 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=1, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.880048e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.082152e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.804795e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.372027e-02 +- 3.270769e-06 ) GeV^0 -TOTAL : 0.398295 sec +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.602023e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.319974e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.288580e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 +TOTAL : 0.573413 sec INFO: No Floating Point Exceptions have been reported - 911,414,092 cycles:u # 2.228 GHz (75.56%) - 2,219,034 stalled-cycles-frontend:u # 0.24% frontend cycles idle (75.94%) - 5,292,087 stalled-cycles-backend:u # 0.58% backend cycles idle (75.81%) - 1,916,223,650 instructions:u # 2.10 insn per cycle - # 0.00 stalled cycles per insn (75.59%) - 0.447965198 seconds time elapsed + 2,248,961,780 cycles # 2.823 GHz + 3,510,545,687 instructions # 1.56 insn per cycle + 0.854969378 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828036033170065E-002 -Relative difference = 1.2498553996774023e-06 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.395142e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.620561e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.620561e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 5.208988 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.082006e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.283429e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.283429e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 6.191643 sec INFO: No Floating Point Exceptions have been reported - 17,164,934,882 cycles:u # 3.291 GHz (75.00%) - 39,709,044 stalled-cycles-frontend:u # 0.23% frontend cycles idle (75.00%) - 32,772,868 stalled-cycles-backend:u # 0.19% backend cycles idle (75.00%) - 47,103,841,077 instructions:u # 2.74 insn per cycle - # 0.00 stalled cycles per insn (75.00%) - 5.264288529 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 542) (avx2: 0) (512y: 0) (512z: 0) + 17,740,084,916 cycles # 2.864 GHz + 43,510,870,904 instructions # 2.45 insn per cycle + 6.196630799 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039569285465E-002 -Relative difference = 3.357602059382168e-08 +Avg ME (F77/C++) = 1.2828039854866802E-002 +Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.889573e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.119819e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.119819e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.857430 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.223628e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.400190e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.400190e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.211334 sec INFO: No Floating Point Exceptions have been reported - 9,161,999,343 cycles:u # 3.198 GHz (74.88%) - 41,164,438 stalled-cycles-frontend:u # 0.45% frontend cycles idle (74.88%) - 917,958,407 stalled-cycles-backend:u # 10.02% backend cycles idle (75.01%) - 22,149,269,363 instructions:u # 2.42 insn per cycle - # 0.04 stalled cycles per insn (75.15%) - 2.885413922 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1883) (avx2: 0) (512y: 0) (512z: 0) + 9,244,093,548 cycles # 2.875 GHz + 21,907,620,538 instructions # 2.37 insn per cycle + 3.216400901 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1938) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039385567536E-002 -Relative difference = 4.7897610623017996e-08 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.345201e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.882374e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.882374e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.561943 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.419643e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.716778e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.716778e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.967957 sec INFO: No Floating Point Exceptions have been reported - 8,174,925,567 cycles:u # 3.180 GHz (74.72%) - 44,207,526 stalled-cycles-frontend:u # 0.54% frontend cycles idle (74.98%) - 1,253,263,119 stalled-cycles-backend:u # 15.33% backend cycles idle (75.10%) - 15,464,275,545 instructions:u # 1.89 insn per cycle - # 0.08 stalled cycles per insn (75.10%) - 2.576121476 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) + 8,316,472,651 cycles # 2.798 GHz + 15,592,546,873 instructions # 1.87 insn per cycle + 2.972957823 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2596) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053369958070E-002 -Relative difference = 2.627022867500074e-07 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.415944e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.716005e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.716005e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.973842 sec +INFO: No Floating Point Exceptions have been reported + 8,315,082,395 cycles # 2.793 GHz + 15,436,266,122 instructions # 1.86 insn per cycle + 2.978955673 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.416813e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.677411e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.677411e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.974949 sec +INFO: No Floating Point Exceptions have been reported + 6,610,937,423 cycles # 2.219 GHz + 12,863,752,208 instructions # 1.95 insn per cycle + 2.980091060 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828052585973637E-002 +Relative difference = 2.0158743040564767e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt index 1b97ebb2e9..29c604a610 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt @@ -1,182 +1,231 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_16:26:50 +DATE: 2024-05-16_15:00:26 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.419416e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.312981e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.312981e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371886e-02 +- 3.270260e-06 ) GeV^0 -TOTAL : 5.477791 sec +EvtsPerSec[Rmb+ME] (23) = ( 6.941162e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.420865e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.420865e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 +TOTAL : 1.745821 sec INFO: No Floating Point Exceptions have been reported - 17,800,579,188 cycles:u # 3.236 GHz (74.97%) - 120,081,643 stalled-cycles-frontend:u # 0.67% frontend cycles idle (75.00%) - 6,869,674,283 stalled-cycles-backend:u # 38.59% backend cycles idle (75.00%) - 17,127,606,895 instructions:u # 0.96 insn per cycle - # 0.40 stalled cycles per insn (75.03%) - 5.530316842 seconds time elapsed + 5,608,708,868 cycles # 2.838 GHz + 10,190,752,473 instructions # 1.82 insn per cycle + 2.033892232 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828036033170065E-002 -Relative difference = 1.2498553996774023e-06 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.372295e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.590791e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.590791e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 5.346442 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.055357e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.251163e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.251163e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 6.442554 sec INFO: No Floating Point Exceptions have been reported - 17,427,678,130 cycles:u # 3.252 GHz (74.94%) - 39,859,631 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.93%) - 67,838,113 stalled-cycles-backend:u # 0.39% backend cycles idle (74.96%) - 47,375,316,725 instructions:u # 2.72 insn per cycle - # 0.00 stalled cycles per insn (75.03%) - 5.362445974 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 542) (avx2: 0) (512y: 0) (512z: 0) + 18,431,262,224 cycles # 2.859 GHz + 43,659,496,470 instructions # 2.37 insn per cycle + 6.448903506 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039569285465E-002 -Relative difference = 3.357602059382168e-08 +Avg ME (F77/C++) = 1.2828039854866802E-002 +Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.753542e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.867887e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.867887e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 3.053973 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.110619e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.160777e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.160777e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.492633 sec INFO: No Floating Point Exceptions have been reported - 9,572,820,101 cycles:u # 3.122 GHz (74.95%) - 42,610,043 stalled-cycles-frontend:u # 0.45% frontend cycles idle (74.96%) - 957,509,627 stalled-cycles-backend:u # 10.00% backend cycles idle (74.96%) - 23,541,807,394 instructions:u # 2.46 insn per cycle - # 0.04 stalled cycles per insn (74.95%) - 3.069040228 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1883) (avx2: 0) (512y: 0) (512z: 0) + 9,993,500,583 cycles # 2.857 GHz + 23,243,476,984 instructions # 2.33 insn per cycle + 3.498991107 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1938) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039385567536E-002 -Relative difference = 4.7897610623017996e-08 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.187475e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.621381e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.621381e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.755362 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.275264e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.392372e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.392372e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 3.272099 sec INFO: No Floating Point Exceptions have been reported - 8,527,609,479 cycles:u # 3.081 GHz (74.87%) - 44,341,079 stalled-cycles-frontend:u # 0.52% frontend cycles idle (74.86%) - 1,278,366,033 stalled-cycles-backend:u # 14.99% backend cycles idle (74.92%) - 16,646,340,310 instructions:u # 1.95 insn per cycle - # 0.08 stalled cycles per insn (75.07%) - 2.770557791 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) + 9,005,707,266 cycles # 2.748 GHz + 16,711,349,389 instructions # 1.86 insn per cycle + 3.278668519 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2596) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053369958070E-002 -Relative difference = 2.627022867500074e-07 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.297831e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.443567e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.443567e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 3.244587 sec +INFO: No Floating Point Exceptions have been reported + 8,928,752,660 cycles # 2.747 GHz + 16,549,135,089 instructions # 1.85 insn per cycle + 3.250993607 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.241559e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.306249e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.306249e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 3.324488 sec +INFO: No Floating Point Exceptions have been reported + 7,378,511,382 cycles # 2.216 GHz + 14,071,008,703 instructions # 1.91 insn per cycle + 3.330848983 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828052585973637E-002 +Relative difference = 2.0158743040564767e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt index fdebe20463..8016aaf3c8 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_16:33:58 +DATE: 2024-05-16_15:11:30 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.184024e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.064173e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.779084e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371895e-02 +- 3.272985e-06 ) GeV^0 -TOTAL : 4.708045 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.314779e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.179276e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.254245e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371863e-02 +- 3.269951e-06 ) GeV^0 +TOTAL : 1.213916 sec INFO: No Floating Point Exceptions have been reported - 14,980,300,908 cycles:u # 3.189 GHz (74.98%) - 53,407,838 stalled-cycles-frontend:u # 0.36% frontend cycles idle (75.04%) - 6,811,892,378 stalled-cycles-backend:u # 45.47% backend cycles idle (75.04%) - 11,342,919,447 instructions:u # 0.76 insn per cycle - # 0.60 stalled cycles per insn (74.96%) - 4.758589849 seconds time elapsed + 4,089,914,869 cycles # 2.847 GHz + 6,594,462,327 instructions # 1.61 insn per cycle + 1.494122889 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828036033170065E-002 -Relative difference = 1.2498553996774023e-06 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.394442e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.621360e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.621360e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.089350e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.292539e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.292539e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 5.212454 sec +TOTAL : 6.490729 sec INFO: No Floating Point Exceptions have been reported - 17,164,329,028 cycles:u # 3.288 GHz (75.02%) - 39,152,890 stalled-cycles-frontend:u # 0.23% frontend cycles idle (75.02%) - 32,697,264 stalled-cycles-backend:u # 0.19% backend cycles idle (75.02%) - 47,185,834,568 instructions:u # 2.75 insn per cycle - # 0.00 stalled cycles per insn (75.02%) - 5.221195761 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 542) (avx2: 0) (512y: 0) (512z: 0) + 18,740,706,935 cycles # 2.886 GHz + 43,689,321,367 instructions # 2.33 insn per cycle + 6.495941000 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039569285465E-002 -Relative difference = 3.357602059382168e-08 +Avg ME (F77/C++) = 1.2828039854866802E-002 +Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.933527e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.182670e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.182670e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.213519e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.407340e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.407340e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.825501 sec +TOTAL : 3.565494 sec INFO: No Floating Point Exceptions have been reported - 9,155,103,067 cycles:u # 3.232 GHz (74.87%) - 42,040,255 stalled-cycles-frontend:u # 0.46% frontend cycles idle (74.88%) - 918,495,192 stalled-cycles-backend:u # 10.03% backend cycles idle (74.88%) - 22,182,387,469 instructions:u # 2.42 insn per cycle - # 0.04 stalled cycles per insn (75.01%) - 2.836069891 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1883) (avx2: 0) (512y: 0) (512z: 0) + 10,288,737,724 cycles # 2.883 GHz + 21,988,558,280 instructions # 2.14 insn per cycle + 3.570732391 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1938) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039385567536E-002 -Relative difference = 4.7897610623017996e-08 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.425850e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.022951e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.022951e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.392103e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.656361e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.656361e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.509275 sec +TOTAL : 3.347045 sec INFO: No Floating Point Exceptions have been reported - 8,126,245,218 cycles:u # 3.229 GHz (74.89%) - 43,460,542 stalled-cycles-frontend:u # 0.53% frontend cycles idle (74.90%) - 1,270,506,684 stalled-cycles-backend:u # 15.63% backend cycles idle (74.90%) - 15,524,237,487 instructions:u # 1.91 insn per cycle - # 0.08 stalled cycles per insn (75.01%) - 2.517716050 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) + 9,294,224,919 cycles # 2.774 GHz + 15,502,535,760 instructions # 1.67 insn per cycle + 3.352354405 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2596) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053369958070E-002 -Relative difference = 2.627022867500074e-07 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.411853e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.706271e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.706271e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 3.334245 sec +INFO: No Floating Point Exceptions have been reported + 9,229,877,586 cycles # 2.765 GHz + 15,144,508,612 instructions # 1.64 insn per cycle + 3.339505215 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.389642e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.623022e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.623022e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 3.358884 sec +INFO: No Floating Point Exceptions have been reported + 7,623,474,420 cycles # 2.266 GHz + 12,573,351,599 instructions # 1.65 insn per cycle + 3.364654068 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828052585973637E-002 +Relative difference = 2.0158743040564767e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt index 35ec3b599c..ef6806658f 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt @@ -1,176 +1,220 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_16:31:47 +DATE: 2024-05-16_15:06:01 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 12 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.191751e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.918687e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.609131e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371886e-02 +- 3.270260e-06 ) GeV^0 -TOTAL : 5.314497 sec +EvtsPerSec[Rmb+ME] (23) = ( 8.717298e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.145539e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.143407e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 +TOTAL : 1.533112 sec INFO: No Floating Point Exceptions have been reported - 17,580,992,681 cycles:u # 3.298 GHz (74.94%) - 119,557,715 stalled-cycles-frontend:u # 0.68% frontend cycles idle (74.94%) - 6,756,513,568 stalled-cycles-backend:u # 38.43% backend cycles idle (74.97%) - 16,772,724,997 instructions:u # 0.95 insn per cycle - # 0.40 stalled cycles per insn (75.08%) - 5.365969775 seconds time elapsed + 4,980,418,158 cycles # 2.839 GHz + 9,119,342,139 instructions # 1.83 insn per cycle + 1.812784805 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828036033170065E-002 -Relative difference = 1.2498553996774023e-06 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.417462e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.646807e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.646807e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 5.127587 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.069115e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.265540e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.265540e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 6.262605 sec INFO: No Floating Point Exceptions have been reported - 17,233,558,516 cycles:u # 3.356 GHz (74.93%) - 40,280,100 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.92%) - 35,081,862 stalled-cycles-backend:u # 0.20% backend cycles idle (74.94%) - 47,162,582,218 instructions:u # 2.74 insn per cycle - # 0.00 stalled cycles per insn (75.01%) - 5.136764939 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 542) (avx2: 0) (512y: 0) (512z: 0) + 17,921,464,120 cycles # 2.860 GHz + 43,508,155,770 instructions # 2.43 insn per cycle + 6.267871711 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039569285465E-002 -Relative difference = 3.357602059382168e-08 +Avg ME (F77/C++) = 1.2828039854866802E-002 +Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.856364e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.058154e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.058154e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.890363 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.205993e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.384022e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.384022e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.237394 sec INFO: No Floating Point Exceptions have been reported - 9,245,390,179 cycles:u # 3.190 GHz (74.88%) - 42,576,371 stalled-cycles-frontend:u # 0.46% frontend cycles idle (74.88%) - 951,286,364 stalled-cycles-backend:u # 10.29% backend cycles idle (74.87%) - 22,150,825,461 instructions:u # 2.40 insn per cycle - # 0.04 stalled cycles per insn (74.96%) - 2.899299092 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1883) (avx2: 0) (512y: 0) (512z: 0) + 9,270,351,681 cycles # 2.860 GHz + 21,907,147,046 instructions # 2.36 insn per cycle + 3.242634203 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1938) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039385567536E-002 -Relative difference = 4.7897610623017996e-08 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.368969e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.936597e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.936597e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.551005 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.379256e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.625966e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.625966e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 3.019248 sec INFO: No Floating Point Exceptions have been reported - 8,148,223,721 cycles:u # 3.185 GHz (74.99%) - 43,084,446 stalled-cycles-frontend:u # 0.53% frontend cycles idle (74.98%) - 1,255,704,336 stalled-cycles-backend:u # 15.41% backend cycles idle (74.98%) - 15,486,815,293 instructions:u # 1.90 insn per cycle - # 0.08 stalled cycles per insn (74.98%) - 2.559787773 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) + 8,297,873,717 cycles # 2.744 GHz + 15,590,905,283 instructions # 1.88 insn per cycle + 3.024598202 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2596) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053369958070E-002 -Relative difference = 2.627022867500074e-07 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.398987e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.671948e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.671948e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 3.000824 sec +INFO: No Floating Point Exceptions have been reported + 8,223,361,227 cycles # 2.736 GHz + 15,434,590,756 instructions # 1.88 insn per cycle + 3.006023707 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.360241e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.570739e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.570739e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 3.046199 sec +INFO: No Floating Point Exceptions have been reported + 6,615,724,908 cycles # 2.169 GHz + 12,863,710,849 instructions # 1.94 insn per cycle + 3.051492012 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828052585973637E-002 +Relative difference = 2.0158743040564767e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt index c03e7d3f34..b613786442 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_15:55:29 +DATE: 2024-05-16_14:33:39 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=1, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.912828e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.116110e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.852680e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.372027e-02 +- 3.270769e-06 ) GeV^0 -TOTAL : 0.398467 sec +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.604585e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.336536e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.343718e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 +TOTAL : 0.571057 sec INFO: No Floating Point Exceptions have been reported - 954,128,617 cycles:u # 2.309 GHz (74.47%) - 2,180,585 stalled-cycles-frontend:u # 0.23% frontend cycles idle (75.29%) - 4,555,761 stalled-cycles-backend:u # 0.48% backend cycles idle (74.06%) - 1,856,624,951 instructions:u # 1.95 insn per cycle - # 0.00 stalled cycles per insn (74.63%) - 0.447288567 seconds time elapsed + 2,242,744,669 cycles # 2.822 GHz + 3,531,920,926 instructions # 1.57 insn per cycle + 0.851832101 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 95 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828036033170065E-002 -Relative difference = 1.2498553996774023e-06 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.492433e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.754217e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.754217e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 4.901236 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.153085e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.384746e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.384746e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 5.830325 sec INFO: No Floating Point Exceptions have been reported - 15,963,575,794 cycles:u # 3.252 GHz (74.94%) - 40,480,511 stalled-cycles-frontend:u # 0.25% frontend cycles idle (74.92%) - 27,696,228 stalled-cycles-backend:u # 0.17% backend cycles idle (74.93%) - 44,027,177,404 instructions:u # 2.76 insn per cycle - # 0.00 stalled cycles per insn (75.00%) - 4.912881175 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 467) (avx2: 0) (512y: 0) (512z: 0) + 16,691,813,815 cycles # 2.861 GHz + 41,266,181,474 instructions # 2.47 insn per cycle + 5.835359179 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 375) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039569285465E-002 -Relative difference = 3.357602059382168e-08 +Avg ME (F77/C++) = 1.2828039854866802E-002 +Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.003327e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.314277e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.314277e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.782850 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.304367e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.587722e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.587722e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.109672 sec INFO: No Floating Point Exceptions have been reported - 8,914,677,636 cycles:u # 3.194 GHz (75.00%) - 40,778,600 stalled-cycles-frontend:u # 0.46% frontend cycles idle (75.06%) - 965,278,963 stalled-cycles-backend:u # 10.83% backend cycles idle (75.06%) - 21,627,615,188 instructions:u # 2.43 insn per cycle - # 0.04 stalled cycles per insn (75.07%) - 2.813249066 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1827) (avx2: 0) (512y: 0) (512z: 0) + 8,995,426,679 cycles # 2.889 GHz + 21,211,089,826 instructions # 2.36 insn per cycle + 3.114839321 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1843) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039385567536E-002 -Relative difference = 4.7897610623017996e-08 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.412090e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.030398e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.030398e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.519419 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.420820e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.716893e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.716893e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.969606 sec INFO: No Floating Point Exceptions have been reported - 8,052,770,656 cycles:u # 3.187 GHz (74.99%) - 43,661,657 stalled-cycles-frontend:u # 0.54% frontend cycles idle (74.99%) - 1,618,780,668 stalled-cycles-backend:u # 20.10% backend cycles idle (74.99%) - 15,326,595,516 instructions:u # 1.90 insn per cycle - # 0.11 stalled cycles per insn (74.99%) - 2.529617963 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2542) (512y: 0) (512z: 0) + 8,272,952,138 cycles # 2.782 GHz + 15,425,102,157 instructions # 1.86 insn per cycle + 2.974640700 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2537) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053369958070E-002 -Relative difference = 2.627022867500074e-07 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.475869e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.854556e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.854556e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.912175 sec +INFO: No Floating Point Exceptions have been reported + 8,117,590,540 cycles # 2.783 GHz + 15,233,342,033 instructions # 1.88 insn per cycle + 2.917189383 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2423) (512y: 8) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.412788e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.668874e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.668874e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.982256 sec +INFO: No Floating Point Exceptions have been reported + 6,592,409,084 cycles # 2.208 GHz + 12,843,659,599 instructions # 1.95 insn per cycle + 2.987368722 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1706) (512y: 18) (512z: 1427) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828052564145764E-002 +Relative difference = 1.9988585667912256e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt index ce1e039a32..e6d46e5965 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_16:14:41 +DATE: 2024-05-16_14:51:09 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=1, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.636778e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.079000e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.802981e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.372027e-02 +- 3.270769e-06 ) GeV^0 -TOTAL : 0.418904 sec +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.307514e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.195447e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.293637e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 +TOTAL : 0.586214 sec INFO: No Floating Point Exceptions have been reported - 964,065,439 cycles:u # 2.411 GHz (74.02%) - 2,055,912 stalled-cycles-frontend:u # 0.21% frontend cycles idle (73.92%) - 4,248,343 stalled-cycles-backend:u # 0.44% backend cycles idle (76.06%) - 1,780,527,270 instructions:u # 1.85 insn per cycle - # 0.00 stalled cycles per insn (76.85%) - 0.469503541 seconds time elapsed + 2,288,759,129 cycles # 2.822 GHz + 3,563,945,826 instructions # 1.56 insn per cycle + 0.869586754 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828036033170065E-002 -Relative difference = 1.2498553996774023e-06 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.895282e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.334516e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.334516e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 3.998723 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.594420e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.075669e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.075669e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 4.325231 sec INFO: No Floating Point Exceptions have been reported - 13,090,879,494 cycles:u # 3.268 GHz (74.95%) - 38,767,383 stalled-cycles-frontend:u # 0.30% frontend cycles idle (75.04%) - 829,920,546 stalled-cycles-backend:u # 6.34% backend cycles idle (75.04%) - 37,991,928,680 instructions:u # 2.90 insn per cycle - # 0.02 stalled cycles per insn (75.05%) - 4.010067746 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 833) (avx2: 0) (512y: 0) (512z: 0) + 12,164,411,956 cycles # 2.810 GHz + 32,427,707,417 instructions # 2.67 insn per cycle + 4.330470336 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 312) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039543819614E-002 -Relative difference = 3.5561191488957804e-08 +Avg ME (F77/C++) = 1.2828039840314887E-002 +Relative difference = 1.244813035273009e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.520980e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.457619e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.457619e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.459682 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.607791e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.429113e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.429113e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 2.796823 sec INFO: No Floating Point Exceptions have been reported - 7,921,061,132 cycles:u # 3.211 GHz (75.03%) - 42,543,698 stalled-cycles-frontend:u # 0.54% frontend cycles idle (75.03%) - 559,552,662 stalled-cycles-backend:u # 7.06% backend cycles idle (75.03%) - 18,622,170,728 instructions:u # 2.35 insn per cycle - # 0.03 stalled cycles per insn (75.03%) - 2.472156987 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2808) (avx2: 0) (512y: 0) (512z: 0) + 8,009,111,157 cycles # 2.859 GHz + 18,657,618,704 instructions # 2.33 insn per cycle + 2.802139139 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1555) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039385567536E-002 -Relative difference = 4.7897610623017996e-08 +Avg ME (F77/C++) = 1.2828039283704129E-002 +Relative difference = 5.583829420356249e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.743858e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.832207e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.832207e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.363179 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.719759e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.472647e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.472647e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.690741 sec INFO: No Floating Point Exceptions have been reported - 7,395,191,570 cycles:u # 3.120 GHz (75.00%) - 44,075,937 stalled-cycles-frontend:u # 0.60% frontend cycles idle (75.03%) - 953,996,053 stalled-cycles-backend:u # 12.90% backend cycles idle (75.03%) - 14,331,799,005 instructions:u # 1.94 insn per cycle - # 0.07 stalled cycles per insn (75.03%) - 2.373336623 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2251) (512y: 0) (512z: 0) + 7,416,864,109 cycles # 2.752 GHz + 14,251,974,045 instructions # 1.92 insn per cycle + 2.696083346 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2237) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053337216261E-002 -Relative difference = 2.601499261602198e-07 +Avg ME (F77/C++) = 1.2828053244447801E-002 +Relative difference = 2.5291823782248813e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.774877e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.630440e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.630440e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.645076 sec +INFO: No Floating Point Exceptions have been reported + 7,291,130,406 cycles # 2.752 GHz + 13,948,384,567 instructions # 1.91 insn per cycle + 2.650598467 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2096) (512y: 3) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053244447801E-002 +Relative difference = 2.5291823782248813e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.434115e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.741003e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.741003e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.962177 sec +INFO: No Floating Point Exceptions have been reported + 6,479,327,720 cycles # 2.184 GHz + 13,423,401,797 instructions # 2.07 insn per cycle + 2.967420151 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2071) (512y: 1) (512z: 1198) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828052562326775E-002 +Relative difference = 1.997440588685788e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt index ad9c8830ac..1851f3246c 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_16:14:55 +DATE: 2024-05-16_14:51:33 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=1, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.851046e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.118410e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.853046e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.372027e-02 +- 3.270769e-06 ) GeV^0 -TOTAL : 0.385599 sec +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.308899e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.200904e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.329787e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 +TOTAL : 0.585570 sec INFO: No Floating Point Exceptions have been reported - 944,031,209 cycles:u # 2.364 GHz (74.16%) - 2,116,405 stalled-cycles-frontend:u # 0.22% frontend cycles idle (75.55%) - 5,084,891 stalled-cycles-backend:u # 0.54% backend cycles idle (76.50%) - 1,787,046,210 instructions:u # 1.89 insn per cycle - # 0.00 stalled cycles per insn (76.13%) - 0.436387294 seconds time elapsed + 2,293,480,451 cycles # 2.820 GHz + 3,552,176,680 instructions # 1.55 insn per cycle + 0.870100804 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 95 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828036033170065E-002 -Relative difference = 1.2498553996774023e-06 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.660871e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.598902e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.598902e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 3.041997 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.129885e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.080551e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.080551e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.330346 sec INFO: No Floating Point Exceptions have been reported - 9,928,178,075 cycles:u # 3.255 GHz (74.85%) - 38,803,000 stalled-cycles-frontend:u # 0.39% frontend cycles idle (74.97%) - 19,233,961 stalled-cycles-backend:u # 0.19% backend cycles idle (75.09%) - 28,511,786,830 instructions:u # 2.87 insn per cycle - # 0.00 stalled cycles per insn (75.09%) - 3.053384571 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 632) (avx2: 0) (512y: 0) (512z: 0) + 9,425,530,261 cycles # 2.826 GHz + 25,263,309,757 instructions # 2.68 insn per cycle + 3.335509619 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 263) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039569285465E-002 -Relative difference = 3.357602059382168e-08 +Avg ME (F77/C++) = 1.2828039838495897E-002 +Relative difference = 1.2589928273811243e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.871089e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.300213e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.300213e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.294096 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.953227e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.493970e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.493970e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 2.514743 sec INFO: No Floating Point Exceptions have been reported - 7,395,021,784 cycles:u # 3.212 GHz (75.01%) - 41,885,141 stalled-cycles-frontend:u # 0.57% frontend cycles idle (74.98%) - 32,181,008 stalled-cycles-backend:u # 0.44% backend cycles idle (74.98%) - 16,782,482,676 instructions:u # 2.27 insn per cycle - # 0.00 stalled cycles per insn (74.98%) - 2.305228466 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2463) (avx2: 0) (512y: 0) (512z: 0) + 7,199,101,915 cycles # 2.858 GHz + 16,870,111,415 instructions # 2.34 insn per cycle + 2.520226033 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1360) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039385567536E-002 -Relative difference = 4.7897610623017996e-08 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.019915e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.467727e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.467727e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.246505 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.869871e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.903620e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.903620e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.568153 sec INFO: No Floating Point Exceptions have been reported - 7,140,857,801 cycles:u # 3.168 GHz (74.81%) - 41,858,018 stalled-cycles-frontend:u # 0.59% frontend cycles idle (74.89%) - 382,399,808 stalled-cycles-backend:u # 5.36% backend cycles idle (75.06%) - 13,531,297,023 instructions:u # 1.89 insn per cycle - # 0.03 stalled cycles per insn (75.16%) - 2.257437630 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2082) (512y: 0) (512z: 0) + 7,089,400,745 cycles # 2.756 GHz + 13,616,924,187 instructions # 1.92 insn per cycle + 2.573571442 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2060) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053331759293E-002 -Relative difference = 2.597245327285885e-07 +Avg ME (F77/C++) = 1.2828053220800939E-002 +Relative difference = 2.5107486628541925e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.911116e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.034757e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.034757e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.534380 sec +INFO: No Floating Point Exceptions have been reported + 7,042,060,221 cycles # 2.774 GHz + 13,426,671,587 instructions # 1.91 insn per cycle + 2.539847169 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1945) (512y: 4) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053220800939E-002 +Relative difference = 2.5107486628541925e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.525985e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.967300e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.967300e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.871748 sec +INFO: No Floating Point Exceptions have been reported + 6,325,625,286 cycles # 2.199 GHz + 13,154,721,049 instructions # 2.08 insn per cycle + 2.877120825 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2029) (512y: 1) (512z: 1083) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828052536860923E-002 +Relative difference = 1.977588895209662e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index 2c4ee4866d..b626a014f8 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_15:55:44 +DATE: 2024-05-16_14:34:05 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.236120e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.113245e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.341425e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 -TOTAL : 0.521510 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.830025e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.944832e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.160865e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.662041 sec INFO: No Floating Point Exceptions have been reported - 1,325,273,277 cycles:u # 2.504 GHz (74.48%) - 2,275,284 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.52%) - 5,594,164 stalled-cycles-backend:u # 0.42% backend cycles idle (74.33%) - 2,060,185,234 instructions:u # 1.55 insn per cycle - # 0.00 stalled cycles per insn (74.50%) - 0.574291100 seconds time elapsed + 2,559,219,510 cycles # 2.861 GHz + 3,969,506,530 instructions # 1.55 insn per cycle + 0.952802853 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039901590281E-002 -Relative difference = 7.67145406542181e-09 +Avg ME (F77/GPU) = 1.2828039901590279E-002 +Relative difference = 7.671454200650844e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.211934e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.383197e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.383197e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.931262 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.044346e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.221236e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.221236e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.439706 sec INFO: No Floating Point Exceptions have been reported - 19,577,391,797 cycles:u # 3.295 GHz (74.96%) - 51,699,483 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.96%) - 49,221,652 stalled-cycles-backend:u # 0.25% backend cycles idle (74.96%) - 47,079,701,413 instructions:u # 2.40 insn per cycle - # 0.00 stalled cycles per insn (74.97%) - 5.945284341 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 474) (avx2: 0) (512y: 0) (512z: 0) + 18,648,827,254 cycles # 2.894 GHz + 44,218,351,924 instructions # 2.37 insn per cycle + 6.444755062 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 439) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.943982e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.472995e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.472995e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.978932 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.634240e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.158489e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.158489e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.270020 sec INFO: No Floating Point Exceptions have been reported - 12,946,839,351 cycles:u # 3.245 GHz (74.96%) - 50,834,055 stalled-cycles-frontend:u # 0.39% frontend cycles idle (74.94%) - 2,028,242,845 stalled-cycles-backend:u # 15.67% backend cycles idle (74.95%) - 30,986,816,206 instructions:u # 2.39 insn per cycle - # 0.07 stalled cycles per insn (74.95%) - 3.994417915 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1667) (avx2: 0) (512y: 0) (512z: 0) + 12,337,216,169 cycles # 2.886 GHz + 30,918,100,190 instructions # 2.51 insn per cycle + 4.275170664 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1685) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.584797e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.409726e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.409726e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.161946 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.943703e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.696046e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.696046e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.651391 sec INFO: No Floating Point Exceptions have been reported - 10,298,446,422 cycles:u # 3.244 GHz (75.03%) - 49,424,298 stalled-cycles-frontend:u # 0.48% frontend cycles idle (75.05%) - 711,524,247 stalled-cycles-backend:u # 6.91% backend cycles idle (75.05%) - 19,483,845,697 instructions:u # 1.89 insn per cycle - # 0.04 stalled cycles per insn (75.05%) - 3.237591258 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2119) (512y: 0) (512z: 0) + 10,097,284,751 cycles # 2.762 GHz + 19,374,074,587 instructions # 1.92 insn per cycle + 3.656592402 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2130) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.039225e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.880994e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.880994e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.498933 sec +INFO: No Floating Point Exceptions have been reported + 9,699,890,764 cycles # 2.769 GHz + 18,944,296,026 instructions # 1.95 insn per cycle + 3.504313379 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1860) (512y: 188) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039951670679E-002 +Relative difference = 3.767475112924841e-09 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.766168e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.359450e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.359450e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.977432 sec +INFO: No Floating Point Exceptions have been reported + 8,362,626,878 cycles # 2.101 GHz + 15,058,722,791 instructions # 1.80 insn per cycle + 3.982532855 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1024) (512y: 155) (512z: 1316) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039951670679E-002 +Relative difference = 3.767475112924841e-09 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt index 9ee3035b08..f9780717c1 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-05-16_15:56:03 +DATE: 2024-05-16_14:34:35 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.914851e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.589977e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.912358e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 -TOTAL : 0.487955 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.831074e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.944999e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.163112e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.663702 sec INFO: No Floating Point Exceptions have been reported - 1,256,988,238 cycles:u # 2.486 GHz (75.00%) - 2,100,692 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.65%) - 4,451,453 stalled-cycles-backend:u # 0.35% backend cycles idle (74.64%) - 2,022,295,986 instructions:u # 1.61 insn per cycle - # 0.00 stalled cycles per insn (74.98%) - 0.540038021 seconds time elapsed + 2,550,713,530 cycles # 2.845 GHz + 3,995,712,636 instructions # 1.57 insn per cycle + 0.958037940 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039901590284E-002 -Relative difference = 7.67145379496374e-09 +Avg ME (F77/GPU) = 1.2828039901590279E-002 +Relative difference = 7.671454200650844e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.313434e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.511799e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.511799e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.518687 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.088480e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.281697e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.281697e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.193535 sec INFO: No Floating Point Exceptions have been reported - 18,598,926,650 cycles:u # 3.363 GHz (74.98%) - 51,474,971 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.97%) - 37,621,317 stalled-cycles-backend:u # 0.20% backend cycles idle (74.90%) - 44,780,680,502 instructions:u # 2.41 insn per cycle - # 0.00 stalled cycles per insn (74.91%) - 5.533021930 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 498) (avx2: 0) (512y: 0) (512z: 0) + 17,967,058,694 cycles # 2.899 GHz + 42,467,805,223 instructions # 2.36 insn per cycle + 6.198684795 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 421) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.026452e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.587298e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.587298e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.830226 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.676284e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.231904e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.231904e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.170684 sec INFO: No Floating Point Exceptions have been reported - 12,685,903,200 cycles:u # 3.302 GHz (75.01%) - 52,800,557 stalled-cycles-frontend:u # 0.42% frontend cycles idle (75.01%) - 1,017,329,956 stalled-cycles-backend:u # 8.02% backend cycles idle (75.01%) - 30,187,527,006 instructions:u # 2.38 insn per cycle - # 0.03 stalled cycles per insn (75.01%) - 3.845010638 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1650) (avx2: 0) (512y: 0) (512z: 0) + 12,134,694,075 cycles # 2.906 GHz + 30,224,929,059 instructions # 2.49 insn per cycle + 4.175943490 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1692) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.618417e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.462454e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.462454e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.131904 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.950812e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.735198e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.735198e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.639278 sec INFO: No Floating Point Exceptions have been reported - 10,285,843,771 cycles:u # 3.272 GHz (74.84%) - 55,711,094 stalled-cycles-frontend:u # 0.54% frontend cycles idle (74.93%) - 243,673,079 stalled-cycles-backend:u # 2.37% backend cycles idle (75.06%) - 19,016,668,706 instructions:u # 1.85 insn per cycle - # 0.01 stalled cycles per insn (75.06%) - 3.157506078 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2072) (512y: 0) (512z: 0) + 10,078,657,444 cycles # 2.766 GHz + 19,257,126,653 instructions # 1.91 insn per cycle + 3.644365244 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2146) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.049769e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.898049e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.898049e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.481211 sec +INFO: No Floating Point Exceptions have been reported + 9,647,917,970 cycles # 2.768 GHz + 18,746,418,128 instructions # 1.94 insn per cycle + 3.486360008 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1834) (512y: 191) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039951670679E-002 +Relative difference = 3.767475112924841e-09 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.796433e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.409552e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.409552e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.919050 sec +INFO: No Floating Point Exceptions have been reported + 8,244,471,456 cycles # 2.102 GHz + 14,980,246,059 instructions # 1.82 insn per cycle + 3.924194596 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1021) (512y: 156) (512z: 1305) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039951670679E-002 +Relative difference = 3.767475112924841e-09 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index 5633007fba..205a4bf5b6 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:56:20 +DATE: 2024-05-16_14:35:05 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.874005e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.955473e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.009544e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 -TOTAL : 0.382065 sec +EvtsPerSec[Rmb+ME] (23) = ( 5.201162e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.181610e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.277713e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.525559 sec INFO: No Floating Point Exceptions have been reported - 904,604,882 cycles:u # 2.294 GHz (74.56%) - 2,038,263 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.90%) - 5,243,681 stalled-cycles-backend:u # 0.58% backend cycles idle (75.68%) - 1,384,022,248 instructions:u # 1.53 insn per cycle - # 0.00 stalled cycles per insn (75.84%) - 0.434083725 seconds time elapsed + 2,155,305,398 cycles # 2.849 GHz + 3,120,666,963 instructions # 1.45 insn per cycle + 0.814520269 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516817 -Relative difference = 3.258803416564443e-07 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.550674e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.617768e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.617768e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.290394 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.068773e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.129905e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.129905e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.176250 sec INFO: No Floating Point Exceptions have been reported - 14,781,492,998 cycles:u # 3.436 GHz (74.90%) - 9,777,978 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.90%) - 1,006,773,006 stalled-cycles-backend:u # 6.81% backend cycles idle (74.99%) - 38,652,569,109 instructions:u # 2.61 insn per cycle - # 0.03 stalled cycles per insn (75.08%) - 4.305596735 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 719) (avx2: 0) (512y: 0) (512z: 0) + 15,001,077,825 cycles # 2.896 GHz + 38,374,710,401 instructions # 2.56 insn per cycle + 5.181415080 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388515649 -Relative difference = 3.258803992249869e-07 +Avg ME (F77/C++) = 2.0288063388515645 +Relative difference = 3.258803994438787e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.401958e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.619011e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.619011e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.562632 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.492980e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.684039e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.684039e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.109528 sec INFO: No Floating Point Exceptions have been reported - 8,701,354,415 cycles:u # 3.380 GHz (74.84%) - 9,183,222 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.83%) - 765,087,646 stalled-cycles-backend:u # 8.79% backend cycles idle (74.99%) - 24,343,097,716 instructions:u # 2.80 insn per cycle - # 0.03 stalled cycles per insn (75.14%) - 2.577619509 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2067) (avx2: 0) (512y: 0) (512z: 0) + 9,049,547,879 cycles # 2.906 GHz + 24,578,150,431 instructions # 2.72 insn per cycle + 3.114795475 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 7.701210e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.295923e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.295923e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.542202 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.554648e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.034559e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.034559e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 1.996517 sec INFO: No Floating Point Exceptions have been reported - 5,119,297,379 cycles:u # 3.295 GHz (74.82%) - 8,889,841 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.79%) - 18,590,632 stalled-cycles-backend:u # 0.36% backend cycles idle (74.79%) - 11,535,225,652 instructions:u # 2.25 insn per cycle - # 0.00 stalled cycles per insn (74.91%) - 1.557171986 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2396) (512y: 0) (512z: 0) + 5,443,502,791 cycles # 2.721 GHz + 11,251,469,346 instructions # 2.07 insn per cycle + 2.001703471 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.119114e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.713742e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.713742e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 1.821745 sec +INFO: No Floating Point Exceptions have been reported + 4,960,408,882 cycles # 2.716 GHz + 10,558,806,229 instructions # 2.13 insn per cycle + 1.826903839 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2074) (512y: 144) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.693426e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.898518e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.898518e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.947911 sec +INFO: No Floating Point Exceptions have been reported + 5,367,244,097 cycles # 1.818 GHz + 7,793,958,391 instructions # 1.45 insn per cycle + 2.953294554 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1542) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt index f570d067f3..4b2366d44f 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt @@ -1,182 +1,231 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_16:27:11 +DATE: 2024-05-16_15:00:55 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.980452e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.801512e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.801512e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.304292 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.373758e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.924060e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.924060e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.825705 sec INFO: No Floating Point Exceptions have been reported - 3,721,346,618 cycles:u # 2.842 GHz (74.99%) - 38,545,810 stalled-cycles-frontend:u # 1.04% frontend cycles idle (74.96%) - 1,166,119,036 stalled-cycles-backend:u # 31.34% backend cycles idle (74.96%) - 3,843,288,243 instructions:u # 1.03 insn per cycle - # 0.30 stalled cycles per insn (75.19%) - 1.359924619 seconds time elapsed + 3,037,157,201 cycles # 2.832 GHz + 4,768,877,833 instructions # 1.57 insn per cycle + 1.128818887 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516817 -Relative difference = 3.258803416564443e-07 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.478658e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.546075e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.546075e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.504195 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.032947e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.092197e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.092197e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.344113 sec INFO: No Floating Point Exceptions have been reported - 14,881,269,953 cycles:u # 3.289 GHz (74.90%) - 9,330,152 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.95%) - 1,034,575,778 stalled-cycles-backend:u # 6.95% backend cycles idle (75.04%) - 38,662,303,687 instructions:u # 2.60 insn per cycle - # 0.03 stalled cycles per insn (75.07%) - 4.527654262 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 719) (avx2: 0) (512y: 0) (512z: 0) + 15,315,317,736 cycles # 2.863 GHz + 38,433,762,310 instructions # 2.51 insn per cycle + 5.351126978 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388515649 -Relative difference = 3.258803992249869e-07 +Avg ME (F77/C++) = 2.0288063388515645 +Relative difference = 3.258803994438787e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.219313e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.422253e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.422253e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.756042 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.394451e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.578816e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.578816e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.279010 sec INFO: No Floating Point Exceptions have been reported - 8,830,553,012 cycles:u # 3.180 GHz (74.97%) - 9,217,198 stalled-cycles-frontend:u # 0.10% frontend cycles idle (74.95%) - 784,099,125 stalled-cycles-backend:u # 8.88% backend cycles idle (74.95%) - 24,543,777,422 instructions:u # 2.78 insn per cycle - # 0.03 stalled cycles per insn (74.95%) - 2.780608984 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2067) (avx2: 0) (512y: 0) (512z: 0) + 9,390,215,737 cycles # 2.859 GHz + 24,761,602,813 instructions # 2.64 insn per cycle + 3.285914811 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 7.530911e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.101861e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.101861e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.659856 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.346272e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.804430e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.804430e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.151067 sec INFO: No Floating Point Exceptions have been reported - 5,313,127,231 cycles:u # 3.161 GHz (74.80%) - 9,346,577 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.76%) - 36,431,573 stalled-cycles-backend:u # 0.69% backend cycles idle (74.81%) - 11,756,634,523 instructions:u # 2.21 insn per cycle - # 0.00 stalled cycles per insn (75.06%) - 1.684064569 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2396) (512y: 0) (512z: 0) + 5,795,064,676 cycles # 2.687 GHz + 11,538,955,643 instructions # 1.99 insn per cycle + 2.157987463 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.949125e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.512113e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.512113e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 1.953091 sec +INFO: No Floating Point Exceptions have been reported + 5,277,608,562 cycles # 2.695 GHz + 10,845,633,589 instructions # 2.06 insn per cycle + 1.960046746 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2074) (512y: 144) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.545325e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.736253e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.736253e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.149111 sec +INFO: No Floating Point Exceptions have been reported + 5,725,568,726 cycles # 1.815 GHz + 8,037,864,149 instructions # 1.40 insn per cycle + 3.156036160 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1542) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt index 485bcd5541..66fdf9efe4 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_16:34:18 +DATE: 2024-05-16_15:11:59 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.147117e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.956770e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.010899e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.582142e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.158915e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.274993e+08 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.241847 sec +TOTAL : 0.629579 sec INFO: No Floating Point Exceptions have been reported - 3,207,936,408 cycles:u # 2.928 GHz (75.24%) - 27,242,339 stalled-cycles-frontend:u # 0.85% frontend cycles idle (75.23%) - 1,155,199,922 stalled-cycles-backend:u # 36.01% backend cycles idle (74.84%) - 2,971,859,727 instructions:u # 0.93 insn per cycle - # 0.39 stalled cycles per insn (74.57%) - 1.292980293 seconds time elapsed + 2,438,671,292 cycles # 2.828 GHz + 3,557,518,240 instructions # 1.46 insn per cycle + 0.918692112 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516817 -Relative difference = 3.258803416564443e-07 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.527489e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.593674e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.593674e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.063642e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.124319e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.124319e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.330667 sec +TOTAL : 5.250994 sec INFO: No Floating Point Exceptions have been reported - 14,767,530,206 cycles:u # 3.400 GHz (74.95%) - 8,678,268 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.96%) - 1,014,325,533 stalled-cycles-backend:u # 6.87% backend cycles idle (74.96%) - 38,729,864,130 instructions:u # 2.62 insn per cycle - # 0.03 stalled cycles per insn (74.95%) - 4.344758665 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 719) (avx2: 0) (512y: 0) (512z: 0) + 15,177,224,624 cycles # 2.888 GHz + 38,389,589,114 instructions # 2.53 insn per cycle + 5.256694767 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388515649 -Relative difference = 3.258803992249869e-07 +Avg ME (F77/C++) = 2.0288063388515645 +Relative difference = 3.258803994438787e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.416414e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.635509e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.635509e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.469246e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.659787e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.659787e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.557400 sec +TOTAL : 3.192043 sec INFO: No Floating Point Exceptions have been reported - 8,688,901,571 cycles:u # 3.382 GHz (74.78%) - 8,925,341 stalled-cycles-frontend:u # 0.10% frontend cycles idle (74.89%) - 771,198,437 stalled-cycles-backend:u # 8.88% backend cycles idle (75.05%) - 24,330,529,250 instructions:u # 2.80 insn per cycle - # 0.03 stalled cycles per insn (75.09%) - 2.570508953 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2067) (avx2: 0) (512y: 0) (512z: 0) + 9,234,869,625 cycles # 2.889 GHz + 24,577,322,685 instructions # 2.66 insn per cycle + 3.197667860 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 7.657527e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.246679e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.246679e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.520662e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.999169e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.999169e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.552285 sec +TOTAL : 2.069942 sec INFO: No Floating Point Exceptions have been reported - 5,104,259,143 cycles:u # 3.263 GHz (74.95%) - 8,574,846 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.94%) - 17,602,340 stalled-cycles-backend:u # 0.34% backend cycles idle (74.94%) - 11,520,617,764 instructions:u # 2.26 insn per cycle - # 0.00 stalled cycles per insn (74.96%) - 1.590665734 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2396) (512y: 0) (512z: 0) + 5,642,462,557 cycles # 2.720 GHz + 11,233,692,701 instructions # 1.99 insn per cycle + 2.075542898 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.151383e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.740134e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.740134e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.875924 sec +INFO: No Floating Point Exceptions have been reported + 5,122,190,825 cycles # 2.724 GHz + 10,508,387,782 instructions # 2.05 insn per cycle + 1.881606947 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2074) (512y: 144) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.617306e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.815381e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.815381e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 3.070946 sec +INFO: No Floating Point Exceptions have been reported + 5,582,158,144 cycles # 1.816 GHz + 7,742,870,902 instructions # 1.39 insn per cycle + 3.076599052 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1542) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt index 184133b4b5..3db0a99453 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt @@ -1,176 +1,220 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_16:32:07 +DATE: 2024-05-16_15:06:28 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.838206e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.944221e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.997743e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.178844 sec +EvtsPerSec[Rmb+ME] (23) = ( 5.591450e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.156507e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.275190e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.728834 sec INFO: No Floating Point Exceptions have been reported - 3,636,585,685 cycles:u # 3.017 GHz (74.97%) - 39,001,917 stalled-cycles-frontend:u # 1.07% frontend cycles idle (75.09%) - 1,153,410,687 stalled-cycles-backend:u # 31.72% backend cycles idle (74.79%) - 3,894,627,156 instructions:u # 1.07 insn per cycle - # 0.30 stalled cycles per insn (74.83%) - 1.230751921 seconds time elapsed + 2,711,621,820 cycles # 2.826 GHz + 4,288,575,941 instructions # 1.58 insn per cycle + 1.017933550 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516817 -Relative difference = 3.258803416564443e-07 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.527013e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.595795e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.595795e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.343540 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.045774e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.105988e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.105988e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.234926 sec INFO: No Floating Point Exceptions have been reported - 14,768,806,534 cycles:u # 3.391 GHz (74.96%) - 9,491,572 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.02%) - 1,010,953,915 stalled-cycles-backend:u # 6.85% backend cycles idle (75.02%) - 38,646,076,600 instructions:u # 2.62 insn per cycle - # 0.03 stalled cycles per insn (75.02%) - 4.360153127 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 719) (avx2: 0) (512y: 0) (512z: 0) + 14,996,539,700 cycles # 2.862 GHz + 38,373,492,139 instructions # 2.56 insn per cycle + 5.240540958 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388515649 -Relative difference = 3.258803992249869e-07 +Avg ME (F77/C++) = 2.0288063388515645 +Relative difference = 3.258803994438787e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.256539e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.461112e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.461112e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.644582 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.431303e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.617753e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.617753e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.165641 sec INFO: No Floating Point Exceptions have been reported - 8,627,687,881 cycles:u # 3.250 GHz (75.00%) - 8,854,719 stalled-cycles-frontend:u # 0.10% frontend cycles idle (74.99%) - 774,635,987 stalled-cycles-backend:u # 8.98% backend cycles idle (74.99%) - 24,443,738,037 instructions:u # 2.83 insn per cycle - # 0.03 stalled cycles per insn (74.99%) - 2.656284712 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2067) (avx2: 0) (512y: 0) (512z: 0) + 9,072,261,960 cycles # 2.862 GHz + 24,578,342,604 instructions # 2.71 insn per cycle + 3.171145800 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 7.520387e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.094489e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.094489e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.575969 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.460196e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.936686e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.936686e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.030403 sec INFO: No Floating Point Exceptions have been reported - 5,140,085,504 cycles:u # 3.239 GHz (74.81%) - 11,584,804 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.80%) - 19,445,142 stalled-cycles-backend:u # 0.38% backend cycles idle (74.80%) - 11,526,520,975 instructions:u # 2.24 insn per cycle - # 0.00 stalled cycles per insn (75.05%) - 1.588695948 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2396) (512y: 0) (512z: 0) + 5,452,336,471 cycles # 2.679 GHz + 11,251,160,510 instructions # 2.06 insn per cycle + 2.035938093 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.063893e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.649981e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.649981e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 1.838982 sec +INFO: No Floating Point Exceptions have been reported + 4,938,631,038 cycles # 2.680 GHz + 10,556,930,414 instructions # 2.14 insn per cycle + 1.844618889 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2074) (512y: 144) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.589787e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.785615e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.785615e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.030446 sec +INFO: No Floating Point Exceptions have been reported + 5,385,276,295 cycles # 1.774 GHz + 7,793,583,016 instructions # 1.45 insn per cycle + 3.036161028 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1542) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt index 94cda4d72c..0caf1293cf 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:56:34 +DATE: 2024-05-16_14:35:28 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.844801e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.918896e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.971923e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 -TOTAL : 0.392057 sec +EvtsPerSec[Rmb+ME] (23) = ( 5.206695e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.183658e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.279171e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.521961 sec INFO: No Floating Point Exceptions have been reported - 870,834,906 cycles:u # 2.227 GHz (74.83%) - 2,194,838 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.48%) - 5,125,670 stalled-cycles-backend:u # 0.59% backend cycles idle (75.64%) - 1,436,621,970 instructions:u # 1.65 insn per cycle - # 0.00 stalled cycles per insn (75.78%) - 0.442672083 seconds time elapsed + 2,148,802,757 cycles # 2.845 GHz + 3,054,152,486 instructions # 1.42 insn per cycle + 0.812117976 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516817 -Relative difference = 3.258803416564443e-07 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.452054e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.513152e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.513152e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.457161 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.068168e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.129039e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.129039e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.177399 sec INFO: No Floating Point Exceptions have been reported - 15,328,825,337 cycles:u # 3.430 GHz (74.95%) - 9,136,960 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.95%) - 203,789,958 stalled-cycles-backend:u # 1.33% backend cycles idle (74.95%) - 39,537,916,360 instructions:u # 2.58 insn per cycle - # 0.01 stalled cycles per insn (74.98%) - 4.472712291 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 596) (avx2: 0) (512y: 0) (512z: 0) + 15,011,872,798 cycles # 2.897 GHz + 40,100,761,049 instructions # 2.67 insn per cycle + 5.182501125 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 669) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.418545e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.641773e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.641773e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.552962 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.634343e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.844834e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.844834e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.993727 sec INFO: No Floating Point Exceptions have been reported - 8,651,232,050 cycles:u # 3.373 GHz (74.99%) - 9,885,837 stalled-cycles-frontend:u # 0.11% frontend cycles idle (75.05%) - 1,229,260,585 stalled-cycles-backend:u # 14.21% backend cycles idle (75.05%) - 23,522,160,140 instructions:u # 2.72 insn per cycle - # 0.05 stalled cycles per insn (75.05%) - 2.567902454 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1948) (avx2: 0) (512y: 0) (512z: 0) + 8,671,029,072 cycles # 2.892 GHz + 23,670,969,931 instructions # 2.73 insn per cycle + 2.999072752 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2072) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.876715e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.350584e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.350584e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.708099 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.945254e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.323667e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.323667e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.228700 sec INFO: No Floating Point Exceptions have been reported - 5,677,260,445 cycles:u # 3.298 GHz (74.98%) - 13,332,118 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.91%) - 770,564,901 stalled-cycles-backend:u # 13.57% backend cycles idle (74.69%) - 13,180,087,374 instructions:u # 2.32 insn per cycle - # 0.06 stalled cycles per insn (74.69%) - 1.724279096 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2560) (512y: 0) (512z: 0) + 6,081,438,462 cycles # 2.724 GHz + 13,061,002,322 instructions # 2.15 insn per cycle + 2.233958089 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2546) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.205594e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.622405e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.622405e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.121856 sec +INFO: No Floating Point Exceptions have been reported + 5,798,891,312 cycles # 2.727 GHz + 12,319,969,769 instructions # 2.12 insn per cycle + 2.127030294 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2093) (512y: 294) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.380432e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.550251e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.550251e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.209519 sec +INFO: No Floating Point Exceptions have been reported + 5,821,355,640 cycles # 1.812 GHz + 9,603,981,726 instructions # 1.65 insn per cycle + 3.214724733 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1509) (512y: 209) (512z: 1971) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt index 7cea2da9aa..6af05ea7e1 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_16:15:08 +DATE: 2024-05-16_14:51:55 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.090215e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.962110e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.016244e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 -TOTAL : 0.402423 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.681198e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.166116e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.276872e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.529611 sec INFO: No Floating Point Exceptions have been reported - 857,611,074 cycles:u # 2.179 GHz (75.51%) - 2,136,985 stalled-cycles-frontend:u # 0.25% frontend cycles idle (74.78%) - 5,438,902 stalled-cycles-backend:u # 0.63% backend cycles idle (74.78%) - 1,371,416,540 instructions:u # 1.60 insn per cycle - # 0.00 stalled cycles per insn (75.96%) - 0.454987208 seconds time elapsed + 2,190,477,637 cycles # 2.832 GHz + 3,135,955,530 instructions # 1.43 insn per cycle + 0.830299558 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516817 -Relative difference = 3.258803416564443e-07 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.815405e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.902732e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.902732e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.910887 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.383572e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.466296e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.466296e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 4.507918 sec INFO: No Floating Point Exceptions have been reported - 13,171,113,764 cycles:u # 3.358 GHz (74.93%) - 9,313,023 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.92%) - 1,409,492,709 stalled-cycles-backend:u # 10.70% backend cycles idle (74.92%) - 35,891,557,595 instructions:u # 2.73 insn per cycle - # 0.04 stalled cycles per insn (74.95%) - 3.932336347 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1078) (avx2: 0) (512y: 0) (512z: 0) + 13,013,442,526 cycles # 2.884 GHz + 34,387,029,075 instructions # 2.64 insn per cycle + 4.513459426 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 686) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.326543e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.537107e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.537107e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.604640 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.946707e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.083881e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.083881e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.667816 sec INFO: No Floating Point Exceptions have been reported - 8,622,137,899 cycles:u # 3.296 GHz (74.95%) - 9,155,093 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.93%) - 2,422,668,392 stalled-cycles-backend:u # 28.10% backend cycles idle (74.93%) - 21,931,398,005 instructions:u # 2.54 insn per cycle - # 0.11 stalled cycles per insn (74.94%) - 2.619209121 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2334) (avx2: 0) (512y: 0) (512z: 0) + 10,591,846,077 cycles # 2.884 GHz + 24,007,245,790 instructions # 2.27 insn per cycle + 3.673406920 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2582) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.667518e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.109828e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.109828e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.753724 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.532632e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.849376e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.849376e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.423240 sec INFO: No Floating Point Exceptions have been reported - 5,807,189,734 cycles:u # 3.290 GHz (75.10%) - 8,245,915 stalled-cycles-frontend:u # 0.14% frontend cycles idle (75.07%) - 1,884,690,751 stalled-cycles-backend:u # 32.45% backend cycles idle (75.07%) - 11,992,379,155 instructions:u # 2.07 insn per cycle - # 0.16 stalled cycles per insn (75.07%) - 1.767919104 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3062) (512y: 0) (512z: 0) + 6,577,855,979 cycles # 2.709 GHz + 12,401,365,684 instructions # 1.89 insn per cycle + 2.428791768 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3154) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 +Avg ME (F77/C++) = 2.0288063388516200 +Relative difference = 3.2588037208240405e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.754457e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.104775e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.104775e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.314834 sec +INFO: No Floating Point Exceptions have been reported + 6,233,998,487 cycles # 2.688 GHz + 11,576,068,199 instructions # 1.86 insn per cycle + 2.320534715 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2690) (512y: 239) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516200 +Relative difference = 3.2588037208240405e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.687851e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.893233e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.893233e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.952132 sec +INFO: No Floating Point Exceptions have been reported + 5,323,772,693 cycles # 1.802 GHz + 9,296,912,008 instructions # 1.75 insn per cycle + 2.957828928 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2115) (512y: 282) (512z: 1958) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt index 638eaa15d2..2040ec21eb 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_16:15:22 +DATE: 2024-05-16_14:52:19 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.856498e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.922856e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.976149e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 -TOTAL : 0.373820 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.680230e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.168644e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.280417e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.532171 sec INFO: No Floating Point Exceptions have been reported - 899,977,017 cycles:u # 2.255 GHz (75.57%) - 2,145,266 stalled-cycles-frontend:u # 0.24% frontend cycles idle (75.52%) - 5,316,146 stalled-cycles-backend:u # 0.59% backend cycles idle (74.03%) - 1,515,438,363 instructions:u # 1.68 insn per cycle - # 0.00 stalled cycles per insn (73.84%) - 0.425285387 seconds time elapsed + 2,169,507,018 cycles # 2.828 GHz + 3,115,355,964 instructions # 1.44 insn per cycle + 0.826043020 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516817 -Relative difference = 3.258803416564443e-07 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.238329e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.346767e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.346767e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.423715 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.524819e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.617052e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.617052e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 4.262483 sec INFO: No Floating Point Exceptions have been reported - 11,725,161,803 cycles:u # 3.413 GHz (74.80%) - 8,705,696 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.92%) - 64,791,846 stalled-cycles-backend:u # 0.55% backend cycles idle (75.04%) - 35,634,475,574 instructions:u # 3.04 insn per cycle - # 0.00 stalled cycles per insn (75.09%) - 3.438838371 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 469) (avx2: 0) (512y: 0) (512z: 0) + 12,358,560,610 cycles # 2.896 GHz + 35,037,446,637 instructions # 2.84 insn per cycle + 4.268207887 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 457) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.824583e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.088173e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.088173e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.353161 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.908483e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.040450e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.040450e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.714757 sec INFO: No Floating Point Exceptions have been reported - 7,944,878,439 cycles:u # 3.360 GHz (74.97%) - 9,601,207 stalled-cycles-frontend:u # 0.12% frontend cycles idle (74.97%) - 1,446,990,839 stalled-cycles-backend:u # 18.21% backend cycles idle (74.97%) - 21,237,436,715 instructions:u # 2.67 insn per cycle - # 0.07 stalled cycles per insn (74.98%) - 2.371216486 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2088) (avx2: 0) (512y: 0) (512z: 0) + 10,745,562,014 cycles # 2.889 GHz + 23,084,374,218 instructions # 2.15 insn per cycle + 3.720383315 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2363) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 7.972827e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.617255e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.617255e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.497448 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.878271e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.246530e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.246530e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.258864 sec INFO: No Floating Point Exceptions have been reported - 4,959,076,198 cycles:u # 3.286 GHz (74.77%) - 8,817,217 stalled-cycles-frontend:u # 0.18% frontend cycles idle (75.09%) - 186,742,422 stalled-cycles-backend:u # 3.77% backend cycles idle (75.09%) - 11,331,742,009 instructions:u # 2.29 insn per cycle - # 0.02 stalled cycles per insn (75.09%) - 1.514537087 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2370) (512y: 0) (512z: 0) + 6,151,591,588 cycles # 2.717 GHz + 11,956,808,073 instructions # 1.94 insn per cycle + 2.264473200 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2509) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.958079e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.345089e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.345089e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.224234 sec +INFO: No Floating Point Exceptions have been reported + 6,017,653,055 cycles # 2.700 GHz + 11,128,128,624 instructions # 1.85 insn per cycle + 2.229785356 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2126) (512y: 174) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.739650e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.951827e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.951827e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.913360 sec +INFO: No Floating Point Exceptions have been reported + 5,212,798,448 cycles # 1.786 GHz + 9,020,884,070 instructions # 1.73 insn per cycle + 2.919040069 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1650) (512y: 208) (512z: 1567) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index b6a754e57c..93f412dad4 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:56:48 +DATE: 2024-05-16_14:35:52 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.535931e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.912917e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.071279e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.078079e+00 +- 3.394933e-03 ) GeV^0 -TOTAL : 0.327484 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.088595e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.705968e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.969781e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 +TOTAL : 0.482195 sec INFO: No Floating Point Exceptions have been reported - 773,236,796 cycles:u # 2.264 GHz (74.45%) - 2,124,472 stalled-cycles-frontend:u # 0.27% frontend cycles idle (74.63%) - 4,788,078 stalled-cycles-backend:u # 0.62% backend cycles idle (74.65%) - 1,358,147,244 instructions:u # 1.76 insn per cycle - # 0.00 stalled cycles per insn (75.30%) - 0.383958616 seconds time elapsed + 2,007,920,858 cycles # 2.849 GHz + 2,840,933,430 instructions # 1.41 insn per cycle + 0.763422225 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 2.028815e+00 -Avg ME (F77/GPU) = 2.0288173652952537 -Relative difference = 1.1658506339321586e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.997580e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.087776e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.087776e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 3.645698 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.200574e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.271569e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.271569e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 4.850875 sec INFO: No Floating Point Exceptions have been reported - 12,589,223,314 cycles:u # 3.445 GHz (74.87%) - 6,876,391 stalled-cycles-frontend:u # 0.05% frontend cycles idle (75.05%) - 1,258,354,294 stalled-cycles-backend:u # 10.00% backend cycles idle (75.05%) - 37,054,233,567 instructions:u # 2.94 insn per cycle - # 0.03 stalled cycles per insn (75.05%) - 3.657725980 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 578) (avx2: 0) (512y: 0) (512z: 0) + 14,073,569,281 cycles # 2.899 GHz + 38,343,239,881 instructions # 2.72 insn per cycle + 4.855897587 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198367925361 -Relative difference = 8.044452636897417e-08 +Avg ME (F77/C++) = 2.0288199022179469 +Relative difference = 4.819651478256564e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 5.721418e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.095346e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.095346e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 -TOTAL : 1.986765 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.925449e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.332953e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.332953e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.217076 sec INFO: No Floating Point Exceptions have been reported - 6,105,516,697 cycles:u # 3.060 GHz (74.78%) - 7,043,845 stalled-cycles-frontend:u # 0.12% frontend cycles idle (74.97%) - 2,117,701,849 stalled-cycles-backend:u # 34.69% backend cycles idle (75.14%) - 15,144,529,164 instructions:u # 2.48 insn per cycle - # 0.14 stalled cycles per insn (75.14%) - 1.998669943 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2459) (avx2: 0) (512y: 0) (512z: 0) + 6,436,588,824 cycles # 2.899 GHz + 15,815,821,412 instructions # 2.46 insn per cycle + 2.222049918 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2690) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198773050681 -Relative difference = 6.047600673895608e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288193548331037 +Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.075653e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.212925e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.212925e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.139711 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.963004e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.029520e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.029520e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.256665 sec INFO: No Floating Point Exceptions have been reported - 3,319,451,666 cycles:u # 2.889 GHz (75.10%) - 7,425,440 stalled-cycles-frontend:u # 0.22% frontend cycles idle (74.96%) - 1,104,222,098 stalled-cycles-backend:u # 33.27% backend cycles idle (74.94%) - 7,606,072,308 instructions:u # 2.29 insn per cycle - # 0.15 stalled cycles per insn (74.93%) - 1.152514939 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3071) (512y: 0) (512z: 0) + 3,455,760,948 cycles # 2.740 GHz + 7,593,976,565 instructions # 2.20 insn per cycle + 1.261861875 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3049) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288186294492334 -Relative difference = 1.826435805832187e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181684445590 +Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.569986e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.110539e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.110539e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.182427 sec +INFO: No Floating Point Exceptions have been reported + 3,244,770,474 cycles # 2.734 GHz + 7,203,559,407 instructions # 2.22 insn per cycle + 1.187623854 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2849) (512y: 23) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181684445590 +Relative difference = 8.302595855806234e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.864494e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.605662e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.605662e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.614546 sec +INFO: No Floating Point Exceptions have been reported + 3,050,749,421 cycles # 1.885 GHz + 5,835,755,685 instructions # 1.91 insn per cycle + 1.619564037 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2368) (512y: 24) (512z: 1888) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288183349184692 +Relative difference = 1.6508058850146622e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt index 139359cba7..426db838d7 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt @@ -1,182 +1,231 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_16:27:27 +DATE: 2024-05-16_15:01:19 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.423820e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.053969e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.053969e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.079682e+00 +- 3.408341e-03 ) GeV^0 -TOTAL : 1.153350 sec +EvtsPerSec[Rmb+ME] (23) = ( 6.801236e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.462846e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.462846e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 +TOTAL : 0.684862 sec INFO: No Floating Point Exceptions have been reported - 3,570,722,038 cycles:u # 3.020 GHz (75.03%) - 21,900,913 stalled-cycles-frontend:u # 0.61% frontend cycles idle (75.10%) - 1,143,439,983 stalled-cycles-backend:u # 32.02% backend cycles idle (75.08%) - 3,893,708,783 instructions:u # 1.09 insn per cycle - # 0.29 stalled cycles per insn (74.61%) - 1.205835268 seconds time elapsed + 2,586,573,508 cycles # 2.828 GHz + 4,016,406,941 instructions # 1.55 insn per cycle + 0.971565490 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 2.028815e+00 -Avg ME (F77/GPU) = 2.0288173652952537 -Relative difference = 1.1658506339321586e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.954737e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.042572e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.042572e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 3.733062 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.176436e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.247449e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.247449e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 4.948763 sec INFO: No Floating Point Exceptions have been reported - 12,647,578,901 cycles:u # 3.377 GHz (74.99%) - 7,387,293 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.01%) - 1,284,454,821 stalled-cycles-backend:u # 10.16% backend cycles idle (75.01%) - 37,049,023,071 instructions:u # 2.93 insn per cycle - # 0.03 stalled cycles per insn (75.01%) - 3.748399047 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 578) (avx2: 0) (512y: 0) (512z: 0) + 14,176,104,430 cycles # 2.862 GHz + 38,383,843,895 instructions # 2.71 insn per cycle + 4.955194603 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198367925361 -Relative difference = 8.044452636897417e-08 +Avg ME (F77/C++) = 2.0288199022179469 +Relative difference = 4.819651478256564e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.263371e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.701339e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.701339e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 -TOTAL : 1.870100 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.809798e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.200764e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.200764e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.315849 sec INFO: No Floating Point Exceptions have been reported - 6,176,943,234 cycles:u # 3.281 GHz (74.96%) - 6,834,359 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.93%) - 2,157,535,934 stalled-cycles-backend:u # 34.93% backend cycles idle (74.93%) - 15,462,240,191 instructions:u # 2.50 insn per cycle - # 0.14 stalled cycles per insn (74.95%) - 1.886098189 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2459) (avx2: 0) (512y: 0) (512z: 0) + 6,633,418,276 cycles # 2.858 GHz + 16,095,968,093 instructions # 2.43 insn per cycle + 2.322298973 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2690) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198773050681 -Relative difference = 6.047600673895608e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288193548331037 +Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.207397e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.360799e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.360799e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.057297 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.679036e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.925640e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.925640e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.342355 sec INFO: No Floating Point Exceptions have been reported - 3,423,379,937 cycles:u # 3.201 GHz (74.59%) - 7,355,299 stalled-cycles-frontend:u # 0.21% frontend cycles idle (74.79%) - 1,119,764,046 stalled-cycles-backend:u # 32.71% backend cycles idle (75.16%) - 7,862,426,800 instructions:u # 2.30 insn per cycle - # 0.14 stalled cycles per insn (75.32%) - 1.072734118 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3071) (512y: 0) (512z: 0) + 3,640,592,514 cycles # 2.701 GHz + 7,831,268,120 instructions # 2.15 insn per cycle + 1.348786146 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3049) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288186294492334 -Relative difference = 1.826435805832187e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181684445590 +Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.163700e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.056629e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.056629e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.278871 sec +INFO: No Floating Point Exceptions have been reported + 3,437,646,895 cycles # 2.676 GHz + 7,439,842,858 instructions # 2.16 insn per cycle + 1.285386542 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2849) (512y: 23) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181684445590 +Relative difference = 8.302595855806234e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.597215e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.292791e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.292791e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.727252 sec +INFO: No Floating Point Exceptions have been reported + 3,258,697,081 cycles # 1.881 GHz + 6,089,840,836 instructions # 1.87 insn per cycle + 1.733818978 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2368) (512y: 24) (512z: 1888) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288183349184692 +Relative difference = 1.6508058850146622e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt index c7f370ba8f..884891874e 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_16:34:32 +DATE: 2024-05-16_15:12:23 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.297408e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.914963e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.074175e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.080169e+00 +- 3.463853e-03 ) GeV^0 -TOTAL : 1.034607 sec +EvtsPerSec[Rmb+ME] (23) = ( 9.468958e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.648278e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.971571e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.079446e+00 +- 3.403306e-03 ) GeV^0 +TOTAL : 0.575797 sec INFO: No Floating Point Exceptions have been reported - 3,110,732,271 cycles:u # 3.009 GHz (74.81%) - 10,895,003 stalled-cycles-frontend:u # 0.35% frontend cycles idle (74.84%) - 1,144,475,117 stalled-cycles-backend:u # 36.79% backend cycles idle (75.28%) - 2,805,564,530 instructions:u # 0.90 insn per cycle - # 0.41 stalled cycles per insn (75.34%) - 1.080274590 seconds time elapsed + 2,271,357,910 cycles # 2.845 GHz + 3,342,640,625 instructions # 1.47 insn per cycle + 0.855647595 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 2.028815e+00 -Avg ME (F77/GPU) = 2.0288173652952537 -Relative difference = 1.1658506339321586e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.968527e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.063388e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.063388e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.198151e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.269622e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.269622e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 3.683255 sec +TOTAL : 4.914840 sec INFO: No Floating Point Exceptions have been reported - 12,618,238,423 cycles:u # 3.419 GHz (74.86%) - 6,998,002 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.92%) - 1,275,040,191 stalled-cycles-backend:u # 10.10% backend cycles idle (75.03%) - 36,998,101,411 instructions:u # 2.93 insn per cycle - # 0.03 stalled cycles per insn (75.08%) - 3.694249748 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 578) (avx2: 0) (512y: 0) (512z: 0) + 14,211,276,974 cycles # 2.889 GHz + 38,370,210,397 instructions # 2.70 insn per cycle + 4.920108721 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198367925361 -Relative difference = 8.044452636897417e-08 +Avg ME (F77/C++) = 2.0288199022179469 +Relative difference = 4.819651478256564e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.353191e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.790048e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.790048e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 -TOTAL : 1.800944 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.892733e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.301573e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.301573e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 2.288479 sec INFO: No Floating Point Exceptions have been reported - 6,110,101,207 cycles:u # 3.377 GHz (74.80%) - 6,856,178 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.81%) - 2,138,374,305 stalled-cycles-backend:u # 35.00% backend cycles idle (74.84%) - 15,165,134,831 instructions:u # 2.48 insn per cycle - # 0.14 stalled cycles per insn (75.05%) - 1.810926152 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2459) (avx2: 0) (512y: 0) (512z: 0) + 6,608,042,838 cycles # 2.882 GHz + 15,829,158,403 instructions # 2.40 insn per cycle + 2.293691008 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2690) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198773050681 -Relative difference = 6.047600673895608e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288193548331037 +Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.221950e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.378779e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.378779e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.006094 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.919042e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.023820e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.023820e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 +TOTAL : 1.319201 sec INFO: No Floating Point Exceptions have been reported - 3,309,075,319 cycles:u # 3.263 GHz (74.76%) - 7,259,054 stalled-cycles-frontend:u # 0.22% frontend cycles idle (74.76%) - 1,112,117,116 stalled-cycles-backend:u # 33.61% backend cycles idle (74.78%) - 7,718,708,171 instructions:u # 2.33 insn per cycle - # 0.14 stalled cycles per insn (74.82%) - 1.015424058 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3071) (512y: 0) (512z: 0) + 3,618,631,378 cycles # 2.734 GHz + 7,578,247,859 instructions # 2.09 insn per cycle + 1.324366743 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3049) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288186294492334 -Relative difference = 1.826435805832187e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181684445590 +Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.492699e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.100151e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.100151e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 +TOTAL : 1.250231 sec +INFO: No Floating Point Exceptions have been reported + 3,418,366,623 cycles # 2.724 GHz + 7,152,275,486 instructions # 2.09 insn per cycle + 1.255758340 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2849) (512y: 23) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181684445590 +Relative difference = 8.302595855806234e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.830732e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.562097e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.562097e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 1.681529 sec +INFO: No Floating Point Exceptions have been reported + 3,218,452,038 cycles # 1.909 GHz + 5,786,270,960 instructions # 1.80 insn per cycle + 1.686847993 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2368) (512y: 24) (512z: 1888) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288183349184692 +Relative difference = 1.6508058850146622e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt index 1024b8038f..7e3b1fa48e 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt @@ -1,176 +1,220 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_16:32:22 +DATE: 2024-05-16_15:06:52 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.639842e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.882829e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.039851e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.079682e+00 +- 3.408341e-03 ) GeV^0 -TOTAL : 1.215821 sec +EvtsPerSec[Rmb+ME] (23) = ( 8.502594e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.623050e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.943883e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 +TOTAL : 0.628666 sec INFO: No Floating Point Exceptions have been reported - 3,530,861,000 cycles:u # 2.896 GHz (74.93%) - 22,024,313 stalled-cycles-frontend:u # 0.62% frontend cycles idle (75.13%) - 1,135,065,941 stalled-cycles-backend:u # 32.15% backend cycles idle (75.13%) - 3,804,129,287 instructions:u # 1.08 insn per cycle - # 0.30 stalled cycles per insn (75.02%) - 1.266337670 seconds time elapsed + 2,403,264,425 cycles # 2.820 GHz + 3,734,811,294 instructions # 1.55 insn per cycle + 0.909767197 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 2.028815e+00 -Avg ME (F77/GPU) = 2.0288173652952537 -Relative difference = 1.1658506339321586e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.943816e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.034642e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.034642e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 3.713292 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.185686e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.257300e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.257300e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 4.884819 sec INFO: No Floating Point Exceptions have been reported - 12,596,850,375 cycles:u # 3.386 GHz (74.87%) - 7,002,203 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.97%) - 1,268,502,926 stalled-cycles-backend:u # 10.07% backend cycles idle (75.06%) - 37,037,500,760 instructions:u # 2.94 insn per cycle - # 0.03 stalled cycles per insn (75.06%) - 3.722349634 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 578) (avx2: 0) (512y: 0) (512z: 0) + 13,995,449,913 cycles # 2.863 GHz + 38,340,978,131 instructions # 2.74 insn per cycle + 4.889991891 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198367925361 -Relative difference = 8.044452636897417e-08 +Avg ME (F77/C++) = 2.0288199022179469 +Relative difference = 4.819651478256564e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.145526e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.573467e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.573467e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 -TOTAL : 1.852312 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.864053e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.263128e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.263128e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.244540 sec INFO: No Floating Point Exceptions have been reported - 6,095,909,330 cycles:u # 3.278 GHz (74.98%) - 6,762,916 stalled-cycles-frontend:u # 0.11% frontend cycles idle (75.05%) - 2,125,898,597 stalled-cycles-backend:u # 34.87% backend cycles idle (75.05%) - 15,162,942,401 instructions:u # 2.49 insn per cycle - # 0.14 stalled cycles per insn (75.05%) - 1.861268659 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2459) (avx2: 0) (512y: 0) (512z: 0) + 6,436,419,349 cycles # 2.862 GHz + 15,815,556,279 instructions # 2.46 insn per cycle + 2.249779623 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2690) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198773050681 -Relative difference = 6.047600673895608e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288193548331037 +Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.198895e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.353510e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.353510e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.024526 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.799961e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.008748e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.008748e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.279634 sec INFO: No Floating Point Exceptions have been reported - 3,351,625,848 cycles:u # 3.247 GHz (74.47%) - 6,982,467 stalled-cycles-frontend:u # 0.21% frontend cycles idle (74.79%) - 1,103,052,859 stalled-cycles-backend:u # 32.91% backend cycles idle (75.17%) - 7,621,520,415 instructions:u # 2.27 insn per cycle - # 0.14 stalled cycles per insn (75.20%) - 1.033919829 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3071) (512y: 0) (512z: 0) + 3,447,592,643 cycles # 2.685 GHz + 7,593,708,789 instructions # 2.20 insn per cycle + 1.284877623 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3049) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288186294492334 -Relative difference = 1.826435805832187e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181684445590 +Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.434984e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.092289e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.092289e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.198937 sec +INFO: No Floating Point Exceptions have been reported + 3,242,375,801 cycles # 2.694 GHz + 7,202,509,960 instructions # 2.22 insn per cycle + 1.204245270 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2849) (512y: 23) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181684445590 +Relative difference = 8.302595855806234e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.713311e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.432943e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.432943e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.650810 sec +INFO: No Floating Point Exceptions have been reported + 3,050,285,995 cycles # 1.842 GHz + 5,834,789,164 instructions # 1.91 insn per cycle + 1.656446986 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2368) (512y: 24) (512z: 1888) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288183349184692 +Relative difference = 1.6508058850146622e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt index 5d2c7c47a2..3e123e6fd7 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:57:00 +DATE: 2024-05-16_14:36:11 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.721482e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.085896e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.266185e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.078079e+00 +- 3.394933e-03 ) GeV^0 -TOTAL : 0.477038 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.096553e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.763289e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.037690e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 +TOTAL : 0.480283 sec INFO: No Floating Point Exceptions have been reported - 751,230,740 cycles:u # 1.945 GHz (74.81%) - 2,180,377 stalled-cycles-frontend:u # 0.29% frontend cycles idle (73.52%) - 6,473,295 stalled-cycles-backend:u # 0.86% backend cycles idle (74.74%) - 1,376,631,056 instructions:u # 1.83 insn per cycle - # 0.00 stalled cycles per insn (75.39%) - 0.532053272 seconds time elapsed + 2,036,711,218 cycles # 2.852 GHz + 2,918,453,967 instructions # 1.43 insn per cycle + 0.771336406 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 2.028815e+00 -Avg ME (F77/GPU) = 2.0288173652952537 -Relative difference = 1.1658506339321586e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.624046e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.702467e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.702467e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 4.160753 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.166079e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.236793e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.236793e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 4.926463 sec INFO: No Floating Point Exceptions have been reported - 12,670,532,130 cycles:u # 3.039 GHz (74.90%) - 7,285,144 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.92%) - 9,227,413 stalled-cycles-backend:u # 0.07% backend cycles idle (75.00%) - 37,386,257,114 instructions:u # 2.95 insn per cycle - # 0.00 stalled cycles per insn (75.06%) - 4.173070232 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 503) (avx2: 0) (512y: 0) (512z: 0) + 14,320,299,267 cycles # 2.905 GHz + 39,836,243,439 instructions # 2.78 insn per cycle + 4.931482509 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 570) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198367925361 -Relative difference = 8.044452636897417e-08 +Avg ME (F77/C++) = 2.0288199028000236 +Relative difference = 4.790961076489297e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.415156e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.929604e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.929604e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 -TOTAL : 1.799517 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.723514e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.285593e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.285593e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 1.919156 sec INFO: No Floating Point Exceptions have been reported - 5,366,705,913 cycles:u # 2.967 GHz (74.82%) - 6,883,107 stalled-cycles-frontend:u # 0.13% frontend cycles idle (74.81%) - 1,361,757,615 stalled-cycles-backend:u # 25.37% backend cycles idle (74.81%) - 15,224,904,680 instructions:u # 2.84 insn per cycle - # 0.09 stalled cycles per insn (74.98%) - 1.811719603 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2330) (avx2: 0) (512y: 0) (512z: 0) + 5,582,245,803 cycles # 2.902 GHz + 15,285,424,302 instructions # 2.74 insn per cycle + 1.924109376 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2474) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198773050681 -Relative difference = 6.047600673895608e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288193548331037 +Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 7.808294e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.509321e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.509321e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.507388 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.349024e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.991002e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.991002e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.737963 sec INFO: No Floating Point Exceptions have been reported - 4,478,110,129 cycles:u # 2.951 GHz (74.62%) - 6,841,835 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.78%) - 1,661,162,208 stalled-cycles-backend:u # 37.10% backend cycles idle (75.02%) - 9,764,490,974 instructions:u # 2.18 insn per cycle - # 0.17 stalled cycles per insn (75.22%) - 1.560863897 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3750) (512y: 0) (512z: 0) + 4,749,494,972 cycles # 2.726 GHz + 9,735,095,064 instructions # 2.05 insn per cycle + 1.742978161 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3708) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288186428369954 -Relative difference = 1.7604478492421832e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288182108197361 +Relative difference = 1.0391259163456515e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.536931e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.219273e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.219273e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.690263 sec +INFO: No Floating Point Exceptions have been reported + 4,623,322,631 cycles # 2.728 GHz + 9,325,575,279 instructions # 2.02 insn per cycle + 1.695318457 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3496) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288182108197361 +Relative difference = 1.0391259163456515e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.572579e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.052133e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.052133e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.968034 sec +INFO: No Floating Point Exceptions have been reported + 3,660,831,684 cycles # 1.856 GHz + 7,034,974,988 instructions # 1.92 insn per cycle + 1.973212700 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2610) (512y: 12) (512z: 2220) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288183459779248 +Relative difference = 1.7053177021099307e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt index 3b4ac1f2ba..c7eded0fc2 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_16:15:34 +DATE: 2024-05-16_14:52:42 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.262621e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.917369e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.077168e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.078079e+00 +- 3.394933e-03 ) GeV^0 -TOTAL : 0.317044 sec +EvtsPerSec[Rmb+ME] (23) = ( 9.456356e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.657836e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.983561e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 +TOTAL : 0.487671 sec INFO: No Floating Point Exceptions have been reported - 786,752,584 cycles:u # 2.316 GHz (73.25%) - 2,044,002 stalled-cycles-frontend:u # 0.26% frontend cycles idle (73.57%) - 5,201,629 stalled-cycles-backend:u # 0.66% backend cycles idle (75.92%) - 1,271,782,822 instructions:u # 1.62 insn per cycle - # 0.00 stalled cycles per insn (76.48%) - 0.366405996 seconds time elapsed + 2,030,099,363 cycles # 2.844 GHz + 2,856,891,631 instructions # 1.41 insn per cycle + 0.771313393 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 2.028815e+00 -Avg ME (F77/GPU) = 2.0288173652952537 -Relative difference = 1.1658506339321586e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.193781e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.296393e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.296393e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 3.430914 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.397227e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.481743e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.481743e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 4.460576 sec INFO: No Floating Point Exceptions have been reported - 11,744,320,248 cycles:u # 3.415 GHz (74.87%) - 6,783,164 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.88%) - 10,253,426 stalled-cycles-backend:u # 0.09% backend cycles idle (74.97%) - 34,149,232,337 instructions:u # 2.91 insn per cycle - # 0.00 stalled cycles per insn (75.07%) - 3.442357286 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 768) (avx2: 0) (512y: 0) (512z: 0) + 12,588,647,411 cycles # 2.819 GHz + 34,372,288,545 instructions # 2.73 insn per cycle + 4.465853868 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 696) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288199088536203 -Relative difference = 4.4925808981097166e-08 +Avg ME (F77/C++) = 2.0288199094356969 +Relative difference = 4.463890496342449e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 7.330381e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.912028e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.912028e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 -TOTAL : 1.578213 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.225217e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.687950e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.687950e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.095644 sec INFO: No Floating Point Exceptions have been reported - 5,330,914,740 cycles:u # 3.361 GHz (74.76%) - 6,901,621 stalled-cycles-frontend:u # 0.13% frontend cycles idle (74.80%) - 2,180,786,368 stalled-cycles-backend:u # 40.91% backend cycles idle (74.92%) - 14,553,683,782 instructions:u # 2.73 insn per cycle - # 0.15 stalled cycles per insn (75.14%) - 1.589188713 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2947) (avx2: 0) (512y: 0) (512z: 0) + 6,085,238,066 cycles # 2.897 GHz + 14,860,574,019 instructions # 2.44 insn per cycle + 2.101017455 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3009) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198769558221 -Relative difference = 6.06481491495597e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288193803280592 +Relative difference = 1.8746278463897685e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 9.171501e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.002861e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.002861e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.290076 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.969640e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.750011e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.750011e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.592133 sec INFO: No Floating Point Exceptions have been reported - 4,203,310,590 cycles:u # 3.237 GHz (74.86%) - 7,035,707 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.75%) - 1,650,350,604 stalled-cycles-backend:u # 39.26% backend cycles idle (74.76%) - 9,089,278,343 instructions:u # 2.16 insn per cycle - # 0.18 stalled cycles per insn (74.79%) - 1.301799243 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4501) (512y: 0) (512z: 0) + 4,316,607,801 cycles # 2.703 GHz + 9,028,975,402 instructions # 2.09 insn per cycle + 1.597664902 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4443) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288186752004549 -Relative difference = 1.6009291367898262e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181999931112 +Relative difference = 9.857617164523888e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.187100e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.023996e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.023996e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.545422 sec +INFO: No Floating Point Exceptions have been reported + 4,204,195,380 cycles # 2.712 GHz + 8,663,569,400 instructions # 2.06 insn per cycle + 1.550927334 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4243) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181999931112 +Relative difference = 9.857617164523888e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.251438e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.680453e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.680453e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 2.083936 sec +INFO: No Floating Point Exceptions have been reported + 3,833,998,104 cycles # 1.836 GHz + 7,808,361,622 instructions # 2.04 insn per cycle + 2.089489123 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4424) (512y: 0) (512z: 2555) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288183246739209 +Relative difference = 1.6003107281264138e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt index cb765f6595..aad34f68a4 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_16:15:46 +DATE: 2024-05-16_14:53:02 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.739392e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.097312e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.278679e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.078079e+00 +- 3.394933e-03 ) GeV^0 -TOTAL : 0.347002 sec +EvtsPerSec[Rmb+ME] (23) = ( 9.520611e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.721194e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.056652e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 +TOTAL : 0.485221 sec INFO: No Floating Point Exceptions have been reported - 759,139,838 cycles:u # 2.271 GHz (72.72%) - 2,093,326 stalled-cycles-frontend:u # 0.28% frontend cycles idle (75.90%) - 5,604,886 stalled-cycles-backend:u # 0.74% backend cycles idle (76.10%) - 1,256,158,675 instructions:u # 1.65 insn per cycle - # 0.00 stalled cycles per insn (76.19%) - 0.393998470 seconds time elapsed + 2,023,639,378 cycles # 2.841 GHz + 2,891,046,466 instructions # 1.43 insn per cycle + 0.769493206 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 2.028815e+00 -Avg ME (F77/GPU) = 2.0288173652952537 -Relative difference = 1.1658506339321586e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.464648e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.585788e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.585788e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 3.172917 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.614708e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.719370e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.719370e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 4.097789 sec INFO: No Floating Point Exceptions have been reported - 10,837,277,551 cycles:u # 3.407 GHz (74.86%) - 7,096,693 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.93%) - 12,679,835 stalled-cycles-backend:u # 0.12% backend cycles idle (75.05%) - 35,363,647,748 instructions:u # 3.26 insn per cycle - # 0.00 stalled cycles per insn (75.11%) - 3.184330509 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 469) (avx2: 0) (512y: 0) (512z: 0) + 11,755,034,517 cycles # 2.866 GHz + 35,108,588,793 instructions # 2.99 insn per cycle + 4.103114971 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 470) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288199088536203 -Relative difference = 4.4925808981097166e-08 +Avg ME (F77/C++) = 2.0288199094356969 +Relative difference = 4.463890496342449e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 7.898283e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.586422e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.586422e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 -TOTAL : 1.473160 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.332294e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.809853e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.809853e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.053683 sec INFO: No Floating Point Exceptions have been reported - 4,987,746,148 cycles:u # 3.368 GHz (74.67%) - 7,158,673 stalled-cycles-frontend:u # 0.14% frontend cycles idle (74.90%) - 1,207,982,497 stalled-cycles-backend:u # 24.22% backend cycles idle (75.16%) - 13,978,550,667 instructions:u # 2.80 insn per cycle - # 0.09 stalled cycles per insn (75.16%) - 1.484769395 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2487) (avx2: 0) (512y: 0) (512z: 0) + 5,951,415,517 cycles # 2.891 GHz + 14,470,123,335 instructions # 2.43 insn per cycle + 2.059025817 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2572) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198892958462 -Relative difference = 5.4565783974899003e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288193583255634 +Relative difference = 1.7661780742548925e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.022259e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.129743e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.129743e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.171796 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.326940e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.191185e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.191185e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.518155 sec INFO: No Floating Point Exceptions have been reported - 3,896,994,070 cycles:u # 3.303 GHz (74.94%) - 7,825,414 stalled-cycles-frontend:u # 0.20% frontend cycles idle (74.92%) - 1,415,466,671 stalled-cycles-backend:u # 36.32% backend cycles idle (74.92%) - 8,569,308,194 instructions:u # 2.20 insn per cycle - # 0.17 stalled cycles per insn (74.92%) - 1.182924741 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3422) (512y: 0) (512z: 0) + 4,152,217,913 cycles # 2.727 GHz + 8,874,854,960 instructions # 2.14 insn per cycle + 1.523530355 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3574) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288186836987734 -Relative difference = 1.559041129563128e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288182107033208 +Relative difference = 1.0385521077446488e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.326335e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.192412e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.192412e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.518142 sec +INFO: No Floating Point Exceptions have been reported + 4,138,145,120 cycles # 2.717 GHz + 8,411,511,000 instructions # 2.03 insn per cycle + 1.523559219 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3319) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288182107033208 +Relative difference = 1.0385521077446488e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.337364e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.777859e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.777859e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 2.053123 sec +INFO: No Floating Point Exceptions have been reported + 3,784,038,038 cycles # 1.840 GHz + 7,702,433,783 instructions # 2.04 insn per cycle + 2.058532499 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3440) (512y: 0) (512z: 2107) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288183204829693 +Relative difference = 1.5796536184903122e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index 970ea8edab..ff88d5da2d 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:57:14 +DATE: 2024-05-16_14:36:32 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.795249e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.009788e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.065293e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 -TOTAL : 0.461697 sec +EvtsPerSec[Rmb+ME] (23) = ( 5.198792e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.180605e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.275668e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.521467 sec INFO: No Floating Point Exceptions have been reported - 895,448,377 cycles:u # 2.024 GHz (75.08%) - 2,236,641 stalled-cycles-frontend:u # 0.25% frontend cycles idle (74.45%) - 6,321,635 stalled-cycles-backend:u # 0.71% backend cycles idle (75.11%) - 1,449,839,670 instructions:u # 1.62 insn per cycle - # 0.00 stalled cycles per insn (75.29%) - 0.518730258 seconds time elapsed + 2,143,649,339 cycles # 2.843 GHz + 3,098,162,725 instructions # 1.45 insn per cycle + 0.810608393 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063423243869 -Relative difference = 3.241686434838304e-07 +Avg ME (F77/GPU) = 2.0288063423243874 +Relative difference = 3.241686432649386e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.180427e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.235310e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.235310e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 5.012988 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.033714e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.092456e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.092456e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.262850 sec INFO: No Floating Point Exceptions have been reported - 15,188,686,492 cycles:u # 3.022 GHz (75.00%) - 9,992,311 stalled-cycles-frontend:u # 0.07% frontend cycles idle (75.01%) - 322,116,034 stalled-cycles-backend:u # 2.12% backend cycles idle (75.01%) - 39,218,384,975 instructions:u # 2.58 insn per cycle - # 0.01 stalled cycles per insn (75.01%) - 5.042832677 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 740) (avx2: 0) (512y: 0) (512z: 0) + 15,278,986,093 cycles # 2.901 GHz + 38,575,389,182 instructions # 2.52 insn per cycle + 5.268064562 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 672) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.932926e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.126192e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.126192e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.869194 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.527314e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.723139e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.723139e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.080390 sec INFO: No Floating Point Exceptions have been reported - 8,570,803,625 cycles:u # 2.974 GHz (74.98%) - 9,411,933 stalled-cycles-frontend:u # 0.11% frontend cycles idle (75.02%) - 1,791,447,510 stalled-cycles-backend:u # 20.90% backend cycles idle (75.03%) - 24,011,968,827 instructions:u # 2.80 insn per cycle - # 0.07 stalled cycles per insn (75.03%) - 2.887958027 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2102) (avx2: 0) (512y: 0) (512z: 0) + 8,961,614,258 cycles # 2.906 GHz + 24,226,315,758 instructions # 2.70 insn per cycle + 3.085434765 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2188) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.935252e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.478607e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.478607e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.715190 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.613394e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.100134e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.100134e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 1.976346 sec INFO: No Floating Point Exceptions have been reported - 5,004,335,960 cycles:u # 2.896 GHz (75.01%) - 8,270,955 stalled-cycles-frontend:u # 0.17% frontend cycles idle (75.01%) - 1,428,725,699 stalled-cycles-backend:u # 28.55% backend cycles idle (75.00%) - 11,394,010,172 instructions:u # 2.28 insn per cycle - # 0.13 stalled cycles per insn (75.00%) - 1.731479036 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2467) (512y: 0) (512z: 0) + 5,394,338,439 cycles # 2.724 GHz + 11,277,527,499 instructions # 2.09 insn per cycle + 1.981499886 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2480) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063930599014 -Relative difference = 2.9916108265801754e-07 +Avg ME (F77/C++) = 2.0288064057068964 +Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.276948e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.897611e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.897611e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 1.778784 sec +INFO: No Floating Point Exceptions have been reported + 4,855,499,941 cycles # 2.723 GHz + 10,526,571,188 instructions # 2.17 insn per cycle + 1.784170390 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2167) (512y: 148) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288064057068964 +Relative difference = 2.9292737240031234e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.815864e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.036087e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.036087e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.856419 sec +INFO: No Floating Point Exceptions have been reported + 5,199,981,370 cycles # 1.818 GHz + 7,603,665,117 instructions # 1.46 insn per cycle + 2.861804972 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1633) (512y: 126) (512z: 1608) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288064057068964 +Relative difference = 2.9292737240031234e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt index dcef71b8d4..1d76304278 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_15:57:29 +DATE: 2024-05-16_14:36:55 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.814587e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.921539e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.974776e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 -TOTAL : 0.455134 sec +EvtsPerSec[Rmb+ME] (23) = ( 5.208651e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.184994e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.280716e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.522551 sec INFO: No Floating Point Exceptions have been reported - 885,238,248 cycles:u # 2.013 GHz (74.14%) - 2,253,424 stalled-cycles-frontend:u # 0.25% frontend cycles idle (74.15%) - 6,274,029 stalled-cycles-backend:u # 0.71% backend cycles idle (75.38%) - 1,416,790,914 instructions:u # 1.60 insn per cycle - # 0.00 stalled cycles per insn (76.34%) - 0.512623577 seconds time elapsed + 2,145,230,616 cycles # 2.840 GHz + 3,093,123,772 instructions # 1.44 insn per cycle + 0.812278354 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063423243869 -Relative difference = 3.241686434838304e-07 +Avg ME (F77/GPU) = 2.0288063423243874 +Relative difference = 3.241686432649386e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.434028e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.498307e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.498307e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.501425 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.021911e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.079930e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.079930e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.294031 sec INFO: No Floating Point Exceptions have been reported - 15,391,438,023 cycles:u # 3.410 GHz (75.01%) - 9,507,342 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.01%) - 18,410,071 stalled-cycles-backend:u # 0.12% backend cycles idle (75.01%) - 40,052,703,727 instructions:u # 2.60 insn per cycle - # 0.00 stalled cycles per insn (75.01%) - 4.526661105 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 630) (avx2: 0) (512y: 0) (512z: 0) + 15,341,153,400 cycles # 2.896 GHz + 40,370,282,827 instructions # 2.63 insn per cycle + 5.299425936 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 669) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.548993e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.785959e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.785959e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.484330 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.710012e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.926494e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.926494e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.935201 sec INFO: No Floating Point Exceptions have been reported - 8,410,602,245 cycles:u # 3.370 GHz (75.01%) - 9,858,507 stalled-cycles-frontend:u # 0.12% frontend cycles idle (75.00%) - 393,451,624 stalled-cycles-backend:u # 4.68% backend cycles idle (75.00%) - 23,510,306,264 instructions:u # 2.80 insn per cycle - # 0.02 stalled cycles per insn (75.00%) - 2.513065225 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1993) (avx2: 0) (512y: 0) (512z: 0) + 8,515,314,447 cycles # 2.897 GHz + 23,253,613,819 instructions # 2.73 insn per cycle + 2.940392108 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2091) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.841073e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.308539e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.308539e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.712883 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.780066e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.132607e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.132607e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.302191 sec INFO: No Floating Point Exceptions have been reported - 5,702,695,154 cycles:u # 3.306 GHz (74.99%) - 11,257,877 stalled-cycles-frontend:u # 0.20% frontend cycles idle (74.95%) - 757,110,103 stalled-cycles-backend:u # 13.28% backend cycles idle (74.95%) - 13,044,940,417 instructions:u # 2.29 insn per cycle - # 0.06 stalled cycles per insn (74.97%) - 1.728031458 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2711) (512y: 0) (512z: 0) + 6,262,262,467 cycles # 2.715 GHz + 12,962,490,062 instructions # 2.07 insn per cycle + 2.307689771 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2669) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063930599014 -Relative difference = 2.9916108265801754e-07 +Avg ME (F77/C++) = 2.0288064057068964 +Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.109643e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.511847e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.511847e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.160493 sec +INFO: No Floating Point Exceptions have been reported + 5,903,466,716 cycles # 2.727 GHz + 12,238,680,442 instructions # 2.07 insn per cycle + 2.165768560 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2209) (512y: 296) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288064057068964 +Relative difference = 2.9292737240031234e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.507940e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.694154e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.694154e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.097205 sec +INFO: No Floating Point Exceptions have been reported + 5,614,268,818 cycles # 1.810 GHz + 8,744,074,840 instructions # 1.56 insn per cycle + 3.102417520 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1490) (512y: 183) (512z: 1909) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288064057068964 +Relative difference = 2.9292737240031234e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index c3705c5c6b..1d7490861d 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-16_15:57:43 +DATE: 2024-05-16_14:37:19 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.891318e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.028000e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.031492e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.872208e+03 +- 2.725298e+03 ) GeV^-2 -TOTAL : 0.390466 sec +EvtsPerSec[Rmb+ME] (23) = ( 8.992211e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.047041e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.061161e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.469322 sec INFO: No Floating Point Exceptions have been reported - 1,063,597,301 cycles:u # 2.575 GHz (74.08%) - 2,246,178 stalled-cycles-frontend:u # 0.21% frontend cycles idle (73.31%) - 5,976,134 stalled-cycles-backend:u # 0.56% backend cycles idle (74.43%) - 1,546,122,425 instructions:u # 1.45 insn per cycle - # 0.00 stalled cycles per insn (74.52%) - 0.434332130 seconds time elapsed + 1,970,950,644 cycles # 2.853 GHz + 2,836,233,202 instructions # 1.44 insn per cycle + 0.747868437 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.616703e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.842455e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.847657e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.805651e+03 +- 1.746055e+03 ) GeV^-2 -TOTAL : 0.624660 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.129686e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.329949e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.341716e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.607217 sec INFO: No Floating Point Exceptions have been reported - 1,725,391,280 cycles:u # 2.688 GHz (74.98%) - 2,211,883 stalled-cycles-frontend:u # 0.13% frontend cycles idle (75.19%) - 5,501,560 stalled-cycles-backend:u # 0.32% backend cycles idle (75.11%) - 2,048,112,265 instructions:u # 1.19 insn per cycle - # 0.00 stalled cycles per insn (75.11%) - 0.673690421 seconds time elapsed + 2,397,125,482 cycles # 2.825 GHz + 3,658,262,516 instructions # 1.53 insn per cycle + 0.909559944 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213684418642 -Relative difference = 4.4692399933517674e-07 +Avg ME (F77/GPU) = 1.4131213684418649 +Relative difference = 4.469239988637851e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.966593e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.979040e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.979040e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 5.551061 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.379379e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.391311e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.391311e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.910347 sec INFO: No Floating Point Exceptions have been reported - 19,445,623,666 cycles:u # 3.501 GHz (74.94%) - 2,647,514 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.94%) - 3,109,224,911 stalled-cycles-backend:u # 15.99% backend cycles idle (74.95%) - 57,883,703,782 instructions:u # 2.98 insn per cycle - # 0.05 stalled cycles per insn (75.02%) - 5.557879289 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1134) (avx2: 0) (512y: 0) (512z: 0) + 19,789,020,586 cycles # 2.863 GHz + 59,609,829,111 instructions # 3.01 insn per cycle + 6.914699001 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1466) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432431 -Relative difference = 4.4692302355460254e-07 +Avg ME (F77/C++) = 1.4131213684432433 +Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 5.963083e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.012897e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.012897e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 2.775187 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.619966e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.665049e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.665049e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.569460 sec INFO: No Floating Point Exceptions have been reported - 9,655,531,377 cycles:u # 3.475 GHz (74.97%) - 2,446,914 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.96%) - 2,489,896,593 stalled-cycles-backend:u # 25.79% backend cycles idle (74.96%) - 29,866,888,415 instructions:u # 3.09 insn per cycle - # 0.08 stalled cycles per insn (74.96%) - 2.781688940 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4742) (avx2: 0) (512y: 0) (512z: 0) + 10,374,266,250 cycles # 2.904 GHz + 30,674,256,165 instructions # 2.96 insn per cycle + 3.573646642 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5153) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.233857e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.254984e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.254984e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.355463 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.120184e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.293257e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.293257e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.820051 sec INFO: No Floating Point Exceptions have been reported - 4,723,300,885 cycles:u # 3.473 GHz (75.06%) - 1,988,659 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.87%) - 1,565,675,561 stalled-cycles-backend:u # 33.15% backend cycles idle (74.72%) - 11,209,857,831 instructions:u # 2.37 insn per cycle - # 0.14 stalled cycles per insn (74.67%) - 1.374600251 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4396) (512y: 0) (512z: 0) + 4,901,380,147 cycles # 2.688 GHz + 11,019,047,598 instructions # 2.25 insn per cycle + 1.824311195 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4467) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.028182e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.049956e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.049956e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.616748 sec +INFO: No Floating Point Exceptions have been reported + 4,378,615,331 cycles # 2.702 GHz + 10,296,117,856 instructions # 2.35 insn per cycle + 1.621129053 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4137) (512y: 91) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684416484 +Relative difference = 4.469241520660492e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.954224e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.056280e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.056280e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.381742 sec +INFO: No Floating Point Exceptions have been reported + 4,108,596,097 cycles # 1.723 GHz + 5,842,404,115 instructions # 1.42 insn per cycle + 2.385936782 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1540) (512y: 95) (512z: 3466) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684416484 +Relative difference = 4.469241520660492e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt index de130d0971..45a1ef164b 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt @@ -1,204 +1,250 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-16_16:27:39 +DATE: 2024-05-16_15:01:40 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.477984e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.990536e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.990536e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 0.591147 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.535443e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.780857e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.780857e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.503670 sec INFO: No Floating Point Exceptions have been reported - 1,657,650,273 cycles:u # 2.808 GHz (74.93%) - 6,517,743 stalled-cycles-frontend:u # 0.39% frontend cycles idle (75.03%) - 294,023,494 stalled-cycles-backend:u # 17.74% backend cycles idle (75.71%) - 2,018,685,371 instructions:u # 1.22 insn per cycle - # 0.15 stalled cycles per insn (75.16%) - 0.639012900 seconds time elapsed + 2,012,376,201 cycles # 2.812 GHz + 3,006,218,540 instructions # 1.49 insn per cycle + 0.774572160 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.209566e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.681342e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.681342e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.948724e+03 +- 1.840727e+03 ) GeV^-2 -TOTAL : 1.285664 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.606024e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.624765e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.624765e+06 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.841754 sec INFO: No Floating Point Exceptions have been reported - 3,837,444,034 cycles:u # 2.925 GHz (75.01%) - 16,902,808 stalled-cycles-frontend:u # 0.44% frontend cycles idle (75.01%) - 854,341,995 stalled-cycles-backend:u # 22.26% backend cycles idle (75.09%) - 3,891,087,275 instructions:u # 1.01 insn per cycle - # 0.22 stalled cycles per insn (74.98%) - 1.342450877 seconds time elapsed + 3,099,668,806 cycles # 2.832 GHz + 4,993,276,525 instructions # 1.61 insn per cycle + 1.155254157 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213684418642 -Relative difference = 4.4692399933517674e-07 +Avg ME (F77/GPU) = 1.4131213684418649 +Relative difference = 4.469239988637851e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.930505e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.942916e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.942916e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 5.623528 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.380068e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.392068e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.392068e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.915910 sec INFO: No Floating Point Exceptions have been reported - 19,574,655,100 cycles:u # 3.478 GHz (74.98%) - 2,123,896 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.98%) - 3,108,477,010 stalled-cycles-backend:u # 15.88% backend cycles idle (74.98%) - 57,822,618,304 instructions:u # 2.95 insn per cycle - # 0.05 stalled cycles per insn (74.98%) - 5.630887153 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1134) (avx2: 0) (512y: 0) (512z: 0) + 19,806,579,322 cycles # 2.863 GHz + 59,611,012,266 instructions # 3.01 insn per cycle + 6.920308116 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1466) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432431 -Relative difference = 4.4692302355460254e-07 +Avg ME (F77/C++) = 1.4131213684432433 +Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.012452e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.062835e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.062835e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 2.757025 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.550339e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.594733e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.594733e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.631431 sec INFO: No Floating Point Exceptions have been reported - 9,670,746,360 cycles:u # 3.503 GHz (74.82%) - 2,470,367 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.92%) - 2,490,285,908 stalled-cycles-backend:u # 25.75% backend cycles idle (75.06%) - 29,902,537,167 instructions:u # 3.09 insn per cycle - # 0.08 stalled cycles per insn (75.08%) - 2.763871519 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4742) (avx2: 0) (512y: 0) (512z: 0) + 10,404,134,292 cycles # 2.862 GHz + 30,722,305,980 instructions # 2.95 insn per cycle + 3.635916319 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5153) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.229766e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.250819e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.250819e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.364425 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.991824e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.166141e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.166141e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.853778 sec INFO: No Floating Point Exceptions have been reported - 4,761,520,184 cycles:u # 3.479 GHz (74.91%) - 2,281,684 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.87%) - 1,565,249,273 stalled-cycles-backend:u # 32.87% backend cycles idle (74.87%) - 11,208,960,590 instructions:u # 2.35 insn per cycle - # 0.14 stalled cycles per insn (74.86%) - 1.371395791 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4396) (512y: 0) (512z: 0) + 4,943,570,309 cycles # 2.661 GHz + 11,067,752,215 instructions # 2.24 insn per cycle + 1.858370590 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4467) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.005140e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.026682e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.026682e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.662867 sec +INFO: No Floating Point Exceptions have been reported + 4,426,260,539 cycles # 2.656 GHz + 10,346,882,831 instructions # 2.34 insn per cycle + 1.667431238 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4137) (512y: 91) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684416484 +Relative difference = 4.469241520660492e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.832038e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.932754e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.932754e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.431611 sec +INFO: No Floating Point Exceptions have been reported + 4,145,808,516 cycles # 1.702 GHz + 5,880,428,508 instructions # 1.42 insn per cycle + 2.436095886 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1540) (512y: 95) (512z: 3466) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684416484 +Relative difference = 4.469241520660492e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt index 4db177d1cc..c8d4c1d012 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-16_15:58:00 +DATE: 2024-05-16_14:37:45 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.740149e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.002618e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.006335e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.872208e+03 +- 2.725298e+03 ) GeV^-2 -TOTAL : 0.434871 sec +EvtsPerSec[Rmb+ME] (23) = ( 8.984938e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.044546e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.056865e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.468609 sec INFO: No Floating Point Exceptions have been reported - 1,055,077,000 cycles:u # 2.553 GHz (75.94%) - 2,245,399 stalled-cycles-frontend:u # 0.21% frontend cycles idle (75.77%) - 5,490,265 stalled-cycles-backend:u # 0.52% backend cycles idle (74.87%) - 1,496,542,639 instructions:u # 1.42 insn per cycle - # 0.00 stalled cycles per insn (74.48%) - 0.482545681 seconds time elapsed + 1,981,002,182 cycles # 2.846 GHz + 2,842,945,772 instructions # 1.44 insn per cycle + 0.752497111 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.534209e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.812283e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.817336e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.805651e+03 +- 1.746055e+03 ) GeV^-2 -TOTAL : 0.620606 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.119070e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.315352e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.326681e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.601408 sec INFO: No Floating Point Exceptions have been reported - 1,701,929,126 cycles:u # 2.681 GHz (75.15%) - 2,195,017 stalled-cycles-frontend:u # 0.13% frontend cycles idle (75.26%) - 5,014,423 stalled-cycles-backend:u # 0.29% backend cycles idle (74.92%) - 2,070,616,173 instructions:u # 1.22 insn per cycle - # 0.00 stalled cycles per insn (74.34%) - 0.670567962 seconds time elapsed + 2,383,936,937 cycles # 2.851 GHz + 3,651,729,049 instructions # 1.53 insn per cycle + 0.896728355 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213684418642 -Relative difference = 4.4692399933517674e-07 +Avg ME (F77/GPU) = 1.4131213684418649 +Relative difference = 4.469239988637851e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.959702e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.972085e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.972085e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 5.563715 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.454763e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.467389e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.467389e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.698212 sec INFO: No Floating Point Exceptions have been reported - 19,472,101,807 cycles:u # 3.497 GHz (75.00%) - 2,608,263 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.00%) - 3,636,439,555 stalled-cycles-backend:u # 18.68% backend cycles idle (75.00%) - 57,674,794,693 instructions:u # 2.96 insn per cycle - # 0.06 stalled cycles per insn (75.00%) - 5.602683884 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1087) (avx2: 0) (512y: 0) (512z: 0) + 19,500,935,732 cycles # 2.911 GHz + 58,799,003,967 instructions # 3.02 insn per cycle + 6.702449206 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1313) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432431 -Relative difference = 4.4692302355460254e-07 +Avg ME (F77/C++) = 1.4131213684432433 +Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.009055e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.059724e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.059724e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 2.754182 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.669930e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.715854e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.715854e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.531511 sec INFO: No Floating Point Exceptions have been reported - 9,655,627,230 cycles:u # 3.501 GHz (74.89%) - 2,380,586 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.02%) - 2,402,259,382 stalled-cycles-backend:u # 24.88% backend cycles idle (75.06%) - 30,249,149,231 instructions:u # 3.13 insn per cycle - # 0.08 stalled cycles per insn (75.05%) - 2.765687472 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4806) (avx2: 0) (512y: 0) (512z: 0) + 10,228,095,464 cycles # 2.894 GHz + 30,347,180,891 instructions # 2.97 insn per cycle + 3.535798492 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4970) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.218546e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.239135e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.239135e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.371826 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.789972e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.950829e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.950829e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.887432 sec INFO: No Floating Point Exceptions have been reported - 4,796,137,030 cycles:u # 3.487 GHz (75.03%) - 2,128,645 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.99%) - 1,512,643,237 stalled-cycles-backend:u # 31.54% backend cycles idle (74.99%) - 11,610,627,473 instructions:u # 2.42 insn per cycle - # 0.13 stalled cycles per insn (74.99%) - 1.378449176 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4489) (512y: 0) (512z: 0) + 5,055,118,079 cycles # 2.674 GHz + 11,484,444,983 instructions # 2.27 insn per cycle + 1.891612421 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4591) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.667837e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.860484e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.860484e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.718788 sec +INFO: No Floating Point Exceptions have been reported + 4,655,858,880 cycles # 2.704 GHz + 10,842,096,596 instructions # 2.33 insn per cycle + 1.722993406 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4183) (512y: 244) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684416484 +Relative difference = 4.469241520660492e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.981237e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.082937e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.082937e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.372571 sec +INFO: No Floating Point Exceptions have been reported + 4,129,142,877 cycles # 1.738 GHz + 6,106,185,085 instructions # 1.48 insn per cycle + 2.376879303 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1457) (512y: 139) (512z: 3568) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684416484 +Relative difference = 4.469241520660492e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index d11c60b43f..e4bc7cf2cc 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-16_15:58:16 +DATE: 2024-05-16_14:38:10 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.721105e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.902816e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.946296e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.415474e+04 +- 1.288238e+04 ) GeV^-2 -TOTAL : 0.373672 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.514552e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.271085e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.366020e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 +TOTAL : 0.450662 sec INFO: No Floating Point Exceptions have been reported - 808,129,453 cycles:u # 2.344 GHz (73.73%) - 2,150,599 stalled-cycles-frontend:u # 0.27% frontend cycles idle (74.86%) - 5,396,964 stalled-cycles-backend:u # 0.67% backend cycles idle (74.68%) - 1,382,264,296 instructions:u # 1.71 insn per cycle - # 0.00 stalled cycles per insn (75.08%) - 0.427180204 seconds time elapsed + 1,888,418,045 cycles # 2.834 GHz + 2,686,004,303 instructions # 1.42 insn per cycle + 0.722549365 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 254 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.377466e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.631328e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.636252e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.619620e+05 +- 1.611328e+05 ) GeV^-2 -TOTAL : 0.447870 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.424662e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.459806e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.527254e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.630099e+02 +- 4.770719e+02 ) GeV^-2 +TOTAL : 0.495261 sec INFO: No Floating Point Exceptions have been reported - 1,170,076,475 cycles:u # 2.573 GHz (75.47%) - 2,155,939 stalled-cycles-frontend:u # 0.18% frontend cycles idle (75.13%) - 4,646,286 stalled-cycles-backend:u # 0.40% backend cycles idle (75.63%) - 1,543,400,318 instructions:u # 1.32 insn per cycle - # 0.00 stalled cycles per insn (75.96%) - 0.493199609 seconds time elapsed + 2,099,817,827 cycles # 2.862 GHz + 2,990,738,948 instructions # 1.42 insn per cycle + 0.790419941 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 1.412404e+00 -Avg ME (F77/GPU) = 1.4131669530965212 -Relative difference = 0.0005401804983001964 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.412608e+00 +Avg ME (F77/GPU) = 1.4132214346515752 +Relative difference = 0.00043425681546129636 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.230818e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.246017e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.246017e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.724764e+02 +- 2.665343e+02 ) GeV^-2 -TOTAL : 5.097129 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.505220e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.518346e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.518346e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 6.562288 sec INFO: No Floating Point Exceptions have been reported - 17,870,197,691 cycles:u # 3.503 GHz (74.92%) - 2,354,191 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.91%) - 3,156,273,545 stalled-cycles-backend:u # 17.66% backend cycles idle (74.96%) - 55,170,458,904 instructions:u # 3.09 insn per cycle - # 0.06 stalled cycles per insn (75.04%) - 5.103820232 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1229) (avx2: 0) (512y: 0) (512z: 0) + 19,080,957,547 cycles # 2.906 GHz + 58,959,648,789 instructions # 3.09 insn per cycle + 6.566573323 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1034) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412998e+00 -Avg ME (F77/C++) = 1.4129978146120550 -Relative difference = 1.3120184529301602e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412986e+00 +Avg ME (F77/C++) = 1.4129858051842916 +Relative difference = 1.3787518662898538e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.072396e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.089312e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.089312e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.724763e+02 +- 2.665342e+02 ) GeV^-2 -TOTAL : 1.553153 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.204155e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.352745e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.352745e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 2.018056 sec INFO: No Floating Point Exceptions have been reported - 5,434,417,773 cycles:u # 3.491 GHz (74.87%) - 2,070,185 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.82%) - 1,714,095,174 stalled-cycles-backend:u # 31.54% backend cycles idle (74.82%) - 16,105,306,032 instructions:u # 2.96 insn per cycle - # 0.11 stalled cycles per insn (74.82%) - 1.559534972 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5205) (avx2: 0) (512y: 0) (512z: 0) + 5,861,245,947 cycles # 2.899 GHz + 16,693,370,121 instructions # 2.85 insn per cycle + 2.022246601 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5765) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412986e+00 -Avg ME (F77/C++) = 1.4129857118325333 -Relative difference = 2.039421953066926e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412987e+00 +Avg ME (F77/C++) = 1.4129865669244737 +Relative difference = 3.06496469061158e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.374359e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.455254e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.455254e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743733e+02 +- 2.676611e+02 ) GeV^-2 -TOTAL : 0.715090 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.747206e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.811751e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.811751e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 +TOTAL : 0.959718 sec INFO: No Floating Point Exceptions have been reported - 2,493,118,731 cycles:u # 3.470 GHz (74.40%) - 1,981,459 stalled-cycles-frontend:u # 0.08% frontend cycles idle (74.58%) - 803,670,261 stalled-cycles-backend:u # 32.24% backend cycles idle (75.14%) - 6,030,919,708 instructions:u # 2.42 insn per cycle - # 0.13 stalled cycles per insn (75.51%) - 0.721282507 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4878) (512y: 0) (512z: 0) + 2,597,973,759 cycles # 2.697 GHz + 5,979,816,432 instructions # 2.30 insn per cycle + 0.963957244 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4917) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133162680784324 -Relative difference = 1.896804623606238e-07 +Avg ME (F77/C++) = 1.4133161655815059 +Relative difference = 1.1715816267550621e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.928786e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.008064e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.008064e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 +TOTAL : 0.871454 sec +INFO: No Floating Point Exceptions have been reported + 2,346,801,151 cycles # 2.682 GHz + 5,601,970,539 instructions # 2.39 insn per cycle + 0.875813732 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4645) (512y: 36) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413316e+00 +Avg ME (F77/C++) = 1.4133161655815059 +Relative difference = 1.1715816267550621e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.412327e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.455439e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.455439e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 1.184240 sec +INFO: No Floating Point Exceptions have been reported + 2,059,493,323 cycles # 1.734 GHz + 3,333,364,881 instructions # 1.62 insn per cycle + 1.188531798 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2144) (512y: 39) (512z: 3675) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413316e+00 +Avg ME (F77/C++) = 1.4133164033579249 +Relative difference = 2.85398258307829e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt index b4c1f4a171..d735dc5897 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt @@ -1,204 +1,250 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-16_16:27:56 +DATE: 2024-05-16_15:02:06 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.309309e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.780084e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.780084e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.755516e+02 +- 2.671055e+02 ) GeV^-2 -TOTAL : 0.496803 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.750186e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.085490e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.085490e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.009071e+02 +- 5.002295e+01 ) GeV^-2 +TOTAL : 0.469338 sec INFO: No Floating Point Exceptions have been reported - 1,438,489,170 cycles:u # 2.782 GHz (75.08%) - 6,397,869 stalled-cycles-frontend:u # 0.44% frontend cycles idle (74.57%) - 286,798,330 stalled-cycles-backend:u # 19.94% backend cycles idle (74.51%) - 1,895,246,735 instructions:u # 1.32 insn per cycle - # 0.15 stalled cycles per insn (75.47%) - 0.542440996 seconds time elapsed + 1,918,362,944 cycles # 2.804 GHz + 2,834,169,916 instructions # 1.48 insn per cycle + 0.742178075 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 254 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.138496e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.474548e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.474548e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.855934e+03 +- 1.791981e+03 ) GeV^-2 -TOTAL : 1.066962 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.524122e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.570005e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.570005e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.737500e+02 +- 4.776370e+02 ) GeV^-2 +TOTAL : 0.651816 sec INFO: No Floating Point Exceptions have been reported - 3,242,368,022 cycles:u # 2.975 GHz (75.14%) - 16,804,648 stalled-cycles-frontend:u # 0.52% frontend cycles idle (75.06%) - 861,623,771 stalled-cycles-backend:u # 26.57% backend cycles idle (75.25%) - 3,452,903,859 instructions:u # 1.06 insn per cycle - # 0.25 stalled cycles per insn (75.25%) - 1.137885372 seconds time elapsed + 2,503,160,784 cycles # 2.822 GHz + 3,832,792,162 instructions # 1.53 insn per cycle + 0.943470239 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 1.412404e+00 -Avg ME (F77/GPU) = 1.4131669530965212 -Relative difference = 0.0005401804983001964 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.412608e+00 +Avg ME (F77/GPU) = 1.4132214346515752 +Relative difference = 0.00043425681546129636 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.209957e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.225050e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.225050e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.724764e+02 +- 2.665343e+02 ) GeV^-2 -TOTAL : 5.132600 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.465694e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.479110e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.479110e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 6.671662 sec INFO: No Floating Point Exceptions have been reported - 17,861,632,443 cycles:u # 3.477 GHz (74.93%) - 2,252,442 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.94%) - 3,175,691,457 stalled-cycles-backend:u # 17.78% backend cycles idle (75.02%) - 55,190,690,458 instructions:u # 3.09 insn per cycle - # 0.06 stalled cycles per insn (75.08%) - 5.145976347 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1229) (avx2: 0) (512y: 0) (512z: 0) + 19,108,337,453 cycles # 2.863 GHz + 58,967,331,894 instructions # 3.09 insn per cycle + 6.675976597 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1034) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412998e+00 -Avg ME (F77/C++) = 1.4129978146120550 -Relative difference = 1.3120184529301602e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412986e+00 +Avg ME (F77/C++) = 1.4129858051842916 +Relative difference = 1.3787518662898538e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.076539e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.093727e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.093727e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.724763e+02 +- 2.665342e+02 ) GeV^-2 -TOTAL : 1.550391 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.093089e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.238027e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.238027e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 2.051178 sec INFO: No Floating Point Exceptions have been reported - 5,413,097,028 cycles:u # 3.484 GHz (74.83%) - 2,034,925 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.78%) - 1,689,135,957 stalled-cycles-backend:u # 31.20% backend cycles idle (74.77%) - 16,170,572,616 instructions:u # 2.99 insn per cycle - # 0.10 stalled cycles per insn (74.93%) - 1.557344480 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5205) (avx2: 0) (512y: 0) (512z: 0) + 5,880,119,320 cycles # 2.862 GHz + 16,741,679,626 instructions # 2.85 insn per cycle + 2.055508197 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5765) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412986e+00 -Avg ME (F77/C++) = 1.4129857118325333 -Relative difference = 2.039421953066926e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412987e+00 +Avg ME (F77/C++) = 1.4129865669244737 +Relative difference = 3.06496469061158e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.363212e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.443546e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.443546e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743733e+02 +- 2.676611e+02 ) GeV^-2 -TOTAL : 0.721122 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.718905e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.782305e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.782305e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 +TOTAL : 0.980043 sec INFO: No Floating Point Exceptions have been reported - 2,497,750,595 cycles:u # 3.442 GHz (74.65%) - 1,839,004 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.64%) - 812,027,393 stalled-cycles-backend:u # 32.51% backend cycles idle (74.81%) - 6,085,428,374 instructions:u # 2.44 insn per cycle - # 0.13 stalled cycles per insn (75.36%) - 0.746413862 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4878) (512y: 0) (512z: 0) + 2,616,418,693 cycles # 2.660 GHz + 6,017,096,104 instructions # 2.30 insn per cycle + 0.984343134 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4917) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133162680784324 -Relative difference = 1.896804623606238e-07 +Avg ME (F77/C++) = 1.4133161655815059 +Relative difference = 1.1715816267550621e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.912882e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.991175e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.991175e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 +TOTAL : 0.883189 sec +INFO: No Floating Point Exceptions have been reported + 2,365,822,002 cycles # 2.667 GHz + 5,638,771,692 instructions # 2.38 insn per cycle + 0.887626463 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4645) (512y: 36) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413316e+00 +Avg ME (F77/C++) = 1.4133161655815059 +Relative difference = 1.1715816267550621e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.399129e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.441231e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.441231e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 1.200076 sec +INFO: No Floating Point Exceptions have been reported + 2,081,452,605 cycles # 1.729 GHz + 3,374,965,036 instructions # 1.62 insn per cycle + 1.204429196 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2144) (512y: 39) (512z: 3675) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413316e+00 +Avg ME (F77/C++) = 1.4133164033579249 +Relative difference = 2.85398258307829e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt index b38fc5c0b0..3d41e21b12 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-16_15:58:29 +DATE: 2024-05-16_14:38:31 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.319448e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.870912e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.912701e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.415474e+04 +- 1.288238e+04 ) GeV^-2 -TOTAL : 0.332063 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.548366e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.290418e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.382374e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 +TOTAL : 0.453301 sec INFO: No Floating Point Exceptions have been reported - 820,248,244 cycles:u # 2.375 GHz (74.42%) - 2,123,048 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.55%) - 5,486,256 stalled-cycles-backend:u # 0.67% backend cycles idle (74.45%) - 1,344,668,467 instructions:u # 1.64 insn per cycle - # 0.00 stalled cycles per insn (74.74%) - 0.378354518 seconds time elapsed + 1,884,361,235 cycles # 2.811 GHz + 2,662,129,036 instructions # 1.41 insn per cycle + 0.727401829 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 248 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.406311e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.691504e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.696906e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.619620e+05 +- 1.611328e+05 ) GeV^-2 -TOTAL : 0.433658 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.381856e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.386346e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.451907e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.630099e+02 +- 4.770719e+02 ) GeV^-2 +TOTAL : 0.498921 sec INFO: No Floating Point Exceptions have been reported - 1,194,190,695 cycles:u # 2.611 GHz (74.01%) - 2,261,387 stalled-cycles-frontend:u # 0.19% frontend cycles idle (73.79%) - 4,605,825 stalled-cycles-backend:u # 0.39% backend cycles idle (74.65%) - 1,598,291,024 instructions:u # 1.34 insn per cycle - # 0.00 stalled cycles per insn (76.16%) - 0.511043637 seconds time elapsed + 2,065,776,106 cycles # 2.820 GHz + 3,002,526,593 instructions # 1.45 insn per cycle + 0.789720140 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 1.412404e+00 -Avg ME (F77/GPU) = 1.4131669531526541 -Relative difference = 0.0005401805380429868 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.412608e+00 +Avg ME (F77/GPU) = 1.4132214346515752 +Relative difference = 0.00043425681546129636 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.257194e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.272653e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.272653e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.724764e+02 +- 2.665343e+02 ) GeV^-2 -TOTAL : 5.057627 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.479714e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.492704e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.492704e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 6.641350 sec INFO: No Floating Point Exceptions have been reported - 17,743,836,861 cycles:u # 3.506 GHz (74.94%) - 2,164,346 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.97%) - 2,715,044,606 stalled-cycles-backend:u # 15.30% backend cycles idle (75.03%) - 54,919,157,247 instructions:u # 3.10 insn per cycle - # 0.05 stalled cycles per insn (75.03%) - 5.067815773 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1171) (avx2: 0) (512y: 0) (512z: 0) + 18,978,826,784 cycles # 2.861 GHz + 58,704,221,037 instructions # 3.09 insn per cycle + 6.645410970 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1029) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412998e+00 -Avg ME (F77/C++) = 1.4129978146120550 -Relative difference = 1.3120184529301602e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412986e+00 +Avg ME (F77/C++) = 1.4129858051842916 +Relative difference = 1.3787518662898538e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.111347e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.129510e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.129510e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.724763e+02 +- 2.665342e+02 ) GeV^-2 -TOTAL : 1.499352 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.494310e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.651898e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.651898e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 1.950028 sec INFO: No Floating Point Exceptions have been reported - 5,239,374,949 cycles:u # 3.486 GHz (75.02%) - 1,722,926 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.98%) - 1,316,643,893 stalled-cycles-backend:u # 25.13% backend cycles idle (74.98%) - 16,173,448,663 instructions:u # 3.09 insn per cycle - # 0.08 stalled cycles per insn (74.98%) - 1.506027235 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5136) (avx2: 0) (512y: 0) (512z: 0) + 5,589,974,968 cycles # 2.862 GHz + 16,510,304,699 instructions # 2.95 insn per cycle + 1.954264273 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5551) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412986e+00 -Avg ME (F77/C++) = 1.4129857712652836 -Relative difference = 1.618803841657786e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412987e+00 +Avg ME (F77/C++) = 1.4129865669244737 +Relative difference = 3.06496469061158e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.125955e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.190690e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.190690e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743733e+02 +- 2.676611e+02 ) GeV^-2 -TOTAL : 0.795721 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.496639e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.543532e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.543532e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 +TOTAL : 1.116418 sec INFO: No Floating Point Exceptions have been reported - 2,773,616,938 cycles:u # 3.471 GHz (75.03%) - 2,135,374 stalled-cycles-frontend:u # 0.08% frontend cycles idle (74.98%) - 783,207,993 stalled-cycles-backend:u # 28.24% backend cycles idle (74.98%) - 6,663,402,965 instructions:u # 2.40 insn per cycle - # 0.12 stalled cycles per insn (74.98%) - 0.802305848 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5430) (512y: 0) (512z: 0) + 2,975,820,242 cycles # 2.657 GHz + 6,633,799,194 instructions # 2.23 insn per cycle + 1.120575232 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5568) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133162680784324 -Relative difference = 1.896804623606238e-07 +Avg ME (F77/C++) = 1.4133161655815059 +Relative difference = 1.1715816267550621e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.615016e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.669374e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.669374e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 +TOTAL : 1.036246 sec +INFO: No Floating Point Exceptions have been reported + 2,759,204,529 cycles # 2.654 GHz + 6,255,102,481 instructions # 2.27 insn per cycle + 1.040401186 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5279) (512y: 25) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413316e+00 +Avg ME (F77/C++) = 1.4133161655815059 +Relative difference = 1.1715816267550621e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.286831e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.322123e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.322123e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 1.300128 sec +INFO: No Floating Point Exceptions have been reported + 2,231,395,652 cycles # 1.715 GHz + 3,699,704,768 instructions # 1.66 insn per cycle + 1.304305216 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2391) (512y: 29) (512z: 3970) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413316e+00 +Avg ME (F77/C++) = 1.4133164033579249 +Relative difference = 2.85398258307829e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index 5a2a58cd8f..18990368c8 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-16_15:58:43 +DATE: 2024-05-16_14:38:52 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.880481e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.022557e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.026202e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.872208e+03 +- 2.725298e+03 ) GeV^-2 -TOTAL : 0.389904 sec +EvtsPerSec[Rmb+ME] (23) = ( 8.980776e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.047318e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.059891e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.469526 sec INFO: No Floating Point Exceptions have been reported - 1,065,922,222 cycles:u # 2.583 GHz (72.81%) - 2,232,390 stalled-cycles-frontend:u # 0.21% frontend cycles idle (73.88%) - 5,061,186 stalled-cycles-backend:u # 0.47% backend cycles idle (75.21%) - 1,576,301,383 instructions:u # 1.48 insn per cycle - # 0.00 stalled cycles per insn (74.83%) - 0.433335259 seconds time elapsed + 1,950,532,568 cycles # 2.815 GHz + 2,802,706,395 instructions # 1.44 insn per cycle + 0.749158155 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.584651e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.841453e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.847217e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.805651e+03 +- 1.746055e+03 ) GeV^-2 -TOTAL : 0.617205 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.120585e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.317479e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.329114e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.606344 sec INFO: No Floating Point Exceptions have been reported - 1,729,612,449 cycles:u # 2.709 GHz (74.43%) - 2,130,932 stalled-cycles-frontend:u # 0.12% frontend cycles idle (75.07%) - 5,626,909 stalled-cycles-backend:u # 0.33% backend cycles idle (75.07%) - 2,018,755,349 instructions:u # 1.17 insn per cycle - # 0.00 stalled cycles per insn (74.45%) - 0.665623654 seconds time elapsed + 2,403,151,636 cycles # 2.824 GHz + 3,669,339,361 instructions # 1.53 insn per cycle + 0.910110717 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213755569483 -Relative difference = 4.4188898885662695e-07 +Avg ME (F77/GPU) = 1.4131213755569487 +Relative difference = 4.418889885423659e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.886072e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.897917e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.897917e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 5.705075 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.348054e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.359694e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.359694e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 7.005029 sec INFO: No Floating Point Exceptions have been reported - 19,987,696,758 cycles:u # 3.501 GHz (74.93%) - 2,586,000 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.91%) - 3,855,146,728 stalled-cycles-backend:u # 19.29% backend cycles idle (74.98%) - 59,054,891,649 instructions:u # 2.95 insn per cycle - # 0.07 stalled cycles per insn (75.06%) - 5.711965398 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1149) (avx2: 0) (512y: 0) (512z: 0) + 20,055,951,018 cycles # 2.863 GHz + 60,536,467,053 instructions # 3.02 insn per cycle + 7.009312607 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1399) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213859069593 Relative difference = 4.345647726386255e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.068777e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.120306e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.120306e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 2.727293 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.638770e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.684822e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.684822e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.556206 sec INFO: No Floating Point Exceptions have been reported - 9,538,124,101 cycles:u # 3.493 GHz (74.79%) - 1,850,579 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.93%) - 2,355,440,883 stalled-cycles-backend:u # 24.70% backend cycles idle (75.10%) - 29,650,788,765 instructions:u # 3.11 insn per cycle - # 0.08 stalled cycles per insn (75.10%) - 2.734010476 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4873) (avx2: 0) (512y: 0) (512z: 0) + 10,186,602,629 cycles # 2.862 GHz + 30,386,009,701 instructions # 2.98 insn per cycle + 3.560429335 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5280) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213792564823 Relative difference = 4.392710025734405e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.251088e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.272896e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.272896e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.337074 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.050822e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.223334e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.223334e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.833811 sec INFO: No Floating Point Exceptions have been reported - 4,669,732,847 cycles:u # 3.482 GHz (74.96%) - 2,112,477 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.95%) - 1,554,542,175 stalled-cycles-backend:u # 33.29% backend cycles idle (74.95%) - 11,137,824,946 instructions:u # 2.39 insn per cycle - # 0.14 stalled cycles per insn (74.95%) - 1.351531421 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4581) (512y: 0) (512z: 0) + 4,877,548,863 cycles # 2.655 GHz + 10,978,535,397 instructions # 2.25 insn per cycle + 1.838126466 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4624) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213600217192 Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.034701e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.056812e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.056812e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.607005 sec +INFO: No Floating Point Exceptions have been reported + 4,285,859,041 cycles # 2.661 GHz + 10,248,085,853 instructions # 2.39 insn per cycle + 1.611327735 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4280) (512y: 82) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213600217192 +Relative difference = 4.5288254008796884e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.675038e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.769490e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.769490e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.480681 sec +INFO: No Floating Point Exceptions have been reported + 4,211,204,679 cycles # 1.695 GHz + 6,044,041,090 instructions # 1.44 insn per cycle + 2.485018889 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2066) (512y: 117) (512z: 3540) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213786174055 +Relative difference = 4.3972324717191576e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt index 38bc847e2e..bea6b18082 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-05-16_15:58:59 +DATE: 2024-05-16_14:39:18 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.388163e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.018679e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.022326e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.872208e+03 +- 2.725298e+03 ) GeV^-2 -TOTAL : 0.505575 sec +EvtsPerSec[Rmb+ME] (23) = ( 8.940348e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.041869e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.054764e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.469154 sec INFO: No Floating Point Exceptions have been reported - 1,071,722,011 cycles:u # 2.592 GHz (73.97%) - 2,181,864 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.20%) - 5,154,232 stalled-cycles-backend:u # 0.48% backend cycles idle (75.03%) - 1,552,348,456 instructions:u # 1.45 insn per cycle - # 0.00 stalled cycles per insn (74.68%) - 0.551434612 seconds time elapsed + 1,946,414,728 cycles # 2.818 GHz + 2,803,423,086 instructions # 1.44 insn per cycle + 0.748059256 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.474005e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.834867e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.839980e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.805651e+03 +- 1.746055e+03 ) GeV^-2 -TOTAL : 0.669724 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.116866e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.312173e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.323463e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.604157 sec INFO: No Floating Point Exceptions have been reported - 1,712,467,594 cycles:u # 2.689 GHz (74.82%) - 2,209,591 stalled-cycles-frontend:u # 0.13% frontend cycles idle (74.90%) - 5,605,006 stalled-cycles-backend:u # 0.33% backend cycles idle (75.17%) - 2,023,028,753 instructions:u # 1.18 insn per cycle - # 0.00 stalled cycles per insn (75.14%) - 0.721236809 seconds time elapsed + 2,374,249,289 cycles # 2.818 GHz + 3,602,148,119 instructions # 1.52 insn per cycle + 0.902621411 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213755569483 -Relative difference = 4.4188898885662695e-07 +Avg ME (F77/GPU) = 1.4131213755569487 +Relative difference = 4.418889885423659e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.908068e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.920040e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.920040e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 5.661916 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.368504e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.380280e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.380280e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.941323 sec INFO: No Floating Point Exceptions have been reported - 19,854,557,786 cycles:u # 3.504 GHz (75.00%) - 2,361,768 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.01%) - 3,900,457,692 stalled-cycles-backend:u # 19.65% backend cycles idle (75.01%) - 58,617,210,039 instructions:u # 2.95 insn per cycle - # 0.07 stalled cycles per insn (75.01%) - 5.670181282 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1026) (avx2: 0) (512y: 0) (512z: 0) + 19,878,296,626 cycles # 2.863 GHz + 59,936,362,271 instructions # 3.02 insn per cycle + 6.945573140 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1276) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213859069593 Relative difference = 4.345647726386255e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.270664e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.325536e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.325536e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 2.640204 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.689994e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.736297e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.736297e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.516340 sec INFO: No Floating Point Exceptions have been reported - 9,230,702,509 cycles:u # 3.491 GHz (74.89%) - 2,254,879 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.89%) - 1,809,915,298 stalled-cycles-backend:u # 19.61% backend cycles idle (74.90%) - 30,150,647,518 instructions:u # 3.27 insn per cycle - # 0.06 stalled cycles per insn (75.02%) - 2.654880754 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4944) (avx2: 0) (512y: 0) (512z: 0) + 10,077,314,757 cycles # 2.863 GHz + 30,098,117,657 instructions # 2.99 insn per cycle + 3.520635536 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5082) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213792564823 Relative difference = 4.392710025734405e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.225853e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.246775e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.246775e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.363889 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.778247e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.940877e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.940877e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.889938 sec INFO: No Floating Point Exceptions have been reported - 4,768,365,786 cycles:u # 3.487 GHz (74.89%) - 2,082,915 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.85%) - 1,538,018,459 stalled-cycles-backend:u # 32.25% backend cycles idle (74.85%) - 11,628,498,602 instructions:u # 2.44 insn per cycle - # 0.13 stalled cycles per insn (74.85%) - 1.370276155 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4685) (512y: 0) (512z: 0) + 5,023,754,472 cycles # 2.654 GHz + 11,483,522,538 instructions # 2.29 insn per cycle + 1.894205310 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4723) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213600217192 Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.644687e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.842226e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.842226e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.722080 sec +INFO: No Floating Point Exceptions have been reported + 4,590,091,342 cycles # 2.660 GHz + 10,809,457,257 instructions # 2.35 insn per cycle + 1.726406566 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4285) (512y: 234) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213600217192 +Relative difference = 4.5288254008796884e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.641517e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.735645e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.735645e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.492729 sec +INFO: No Floating Point Exceptions have been reported + 4,229,101,372 cycles # 1.695 GHz + 6,273,394,761 instructions # 1.48 insn per cycle + 2.496999493 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1961) (512y: 163) (512z: 3617) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213786174055 +Relative difference = 4.3972324717191576e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index f745f0bfe4..adf6424639 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_15:59:15 +DATE: 2024-05-16_14:39:44 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.240694e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.470445e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.471175e+04 ) sec^-1 -MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 -TOTAL : 0.647569 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.453895e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.477096e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.479397e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.529744 sec INFO: No Floating Point Exceptions have been reported - 1,917,427,552 cycles:u # 2.918 GHz (74.21%) - 2,068,160 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.28%) - 5,748,928 stalled-cycles-backend:u # 0.30% backend cycles idle (75.52%) - 2,099,178,567 instructions:u # 1.09 insn per cycle - # 0.00 stalled cycles per insn (75.85%) - 0.694887363 seconds time elapsed + 2,179,317,048 cycles # 2.822 GHz + 3,403,036,461 instructions # 1.56 insn per cycle + 0.830470867 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.242927e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.245498e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.245555e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 -TOTAL : 7.695051 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.124157e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.151338e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.152519e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.042150 sec INFO: No Floating Point Exceptions have been reported - 26,568,387,560 cycles:u # 3.439 GHz (74.88%) - 3,047,402 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.01%) - 5,977,774 stalled-cycles-backend:u # 0.02% backend cycles idle (75.05%) - 21,140,043,277 instructions:u # 0.80 insn per cycle - # 0.00 stalled cycles per insn (75.05%) - 7.749841650 seconds time elapsed + 9,405,604,432 cycles # 2.853 GHz + 20,118,562,201 instructions # 2.14 insn per cycle + 3.353608047 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158101E-004 -Relative difference = 2.837296517127185e-07 +Avg ME (F77/GPU) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.959223e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.960008e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.960008e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 8.384500 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.820592e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.821434e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.821434e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 9.018372 sec INFO: No Floating Point Exceptions have been reported - 25,871,962,772 cycles:u # 3.084 GHz (74.96%) - 5,483,137 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.02%) - 3,011,151,668 stalled-cycles-backend:u # 11.64% backend cycles idle (75.02%) - 81,653,061,558 instructions:u # 3.16 insn per cycle - # 0.04 stalled cycles per insn (75.02%) - 8.392660557 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 6614) (avx2: 0) (512y: 0) (512z: 0) + 25,614,013,948 cycles # 2.839 GHz + 78,938,013,495 instructions # 3.08 insn per cycle + 9.022664733 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4893) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.436753e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.440754e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.440754e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.710024 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.519494e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.522699e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.522699e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.669138 sec INFO: No Floating Point Exceptions have been reported - 11,426,095,552 cycles:u # 3.076 GHz (75.02%) - 766,709 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.02%) - 1,377,794,981 stalled-cycles-backend:u # 12.06% backend cycles idle (75.02%) - 39,154,218,221 instructions:u # 3.43 insn per cycle - # 0.04 stalled cycles per insn (75.02%) - 3.718815324 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:12814) (avx2: 0) (512y: 0) (512z: 0) + 12,898,966,245 cycles # 2.761 GHz + 39,280,150,365 instructions # 3.05 insn per cycle + 4.673492352 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13184) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.058588e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.060862e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.060862e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.562917 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.859599e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.875346e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.875346e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.097013 sec INFO: No Floating Point Exceptions have been reported - 4,813,152,911 cycles:u # 3.071 GHz (74.86%) - 414,825 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.99%) - 543,029,817 stalled-cycles-backend:u # 11.28% backend cycles idle (74.99%) - 13,718,682,409 instructions:u # 2.85 insn per cycle - # 0.04 stalled cycles per insn (74.99%) - 1.570777631 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11059) (512y: 0) (512z: 0) + 5,574,685,577 cycles # 2.655 GHz + 13,685,856,406 instructions # 2.46 insn per cycle + 2.101249976 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157309E-004 -Relative difference = 2.837296636563793e-07 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.915800e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.935807e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.935807e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.848754 sec +INFO: No Floating Point Exceptions have been reported + 4,887,101,603 cycles # 2.639 GHz + 12,341,123,817 instructions # 2.53 insn per cycle + 1.853060894 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.728417e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.739729e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.739729e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.446881 sec +INFO: No Floating Point Exceptions have been reported + 4,107,098,137 cycles # 1.676 GHz + 6,336,202,498 instructions # 1.54 insn per cycle + 2.451096147 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt index 2ac03592db..92636e2555 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt @@ -1,204 +1,250 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_16:28:19 +DATE: 2024-05-16_15:02:53 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.299483e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.450217e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.450217e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.698110 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.094987e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.434034e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.434034e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.523594 sec INFO: No Floating Point Exceptions have been reported - 1,943,776,735 cycles:u # 2.766 GHz (74.48%) - 2,561,579 stalled-cycles-frontend:u # 0.13% frontend cycles idle (75.45%) - 48,587,924 stalled-cycles-backend:u # 2.50% backend cycles idle (75.62%) - 2,213,319,398 instructions:u # 1.14 insn per cycle - # 0.02 stalled cycles per insn (75.15%) - 0.744784576 seconds time elapsed + 2,118,517,608 cycles # 2.813 GHz + 3,348,276,596 instructions # 1.58 insn per cycle + 0.813391390 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.205592e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.241191e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.241191e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 -TOTAL : 8.610558 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.622834e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.121853e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.121853e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.329713 sec INFO: No Floating Point Exceptions have been reported - 28,067,417,419 cycles:u # 3.264 GHz (74.91%) - 21,383,596 stalled-cycles-frontend:u # 0.08% frontend cycles idle (74.93%) - 1,133,454,846 stalled-cycles-backend:u # 4.04% backend cycles idle (74.97%) - 22,563,060,022 instructions:u # 0.80 insn per cycle - # 0.05 stalled cycles per insn (75.02%) - 8.675229690 seconds time elapsed + 10,291,111,145 cycles # 2.854 GHz + 21,714,903,322 instructions # 2.11 insn per cycle + 3.660758937 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158101E-004 -Relative difference = 2.837296517127185e-07 +Avg ME (F77/GPU) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.160540e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.161393e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.161393e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 7.606452 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.836126e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.837051e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.837051e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.946025 sec INFO: No Floating Point Exceptions have been reported - 26,144,739,034 cycles:u # 3.435 GHz (74.99%) - 24,042,283 stalled-cycles-frontend:u # 0.09% frontend cycles idle (74.99%) - 3,442,166,079 stalled-cycles-backend:u # 13.17% backend cycles idle (74.99%) - 81,651,638,110 instructions:u # 3.12 insn per cycle - # 0.04 stalled cycles per insn (74.98%) - 7.622588272 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 6614) (avx2: 0) (512y: 0) (512z: 0) + 25,625,027,072 cycles # 2.863 GHz + 78,943,584,564 instructions # 3.08 insn per cycle + 8.950491990 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4893) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 5.017452e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.022007e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.022007e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.284283 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.512313e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.515690e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.515690e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.682891 sec INFO: No Floating Point Exceptions have been reported - 11,484,312,105 cycles:u # 3.493 GHz (74.95%) - 847,543 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.94%) - 1,391,797,034 stalled-cycles-backend:u # 12.12% backend cycles idle (74.94%) - 39,232,489,464 instructions:u # 3.42 insn per cycle - # 0.04 stalled cycles per insn (74.94%) - 3.291507424 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:12814) (avx2: 0) (512y: 0) (512z: 0) + 12,903,818,271 cycles # 2.754 GHz + 39,293,324,950 instructions # 3.05 insn per cycle + 4.687529036 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13184) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.197643e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.200220e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.200220e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.384891 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.867831e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.884189e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.884189e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.098804 sec INFO: No Floating Point Exceptions have been reported - 4,845,546,394 cycles:u # 3.488 GHz (74.68%) - 555,916 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.73%) - 601,492,271 stalled-cycles-backend:u # 12.41% backend cycles idle (75.02%) - 13,720,833,344 instructions:u # 2.83 insn per cycle - # 0.04 stalled cycles per insn (75.24%) - 1.392181254 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11059) (512y: 0) (512z: 0) + 5,587,651,201 cycles # 2.658 GHz + 13,696,262,775 instructions # 2.45 insn per cycle + 2.103410758 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157309E-004 -Relative difference = 2.837296636563793e-07 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.952196e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.973818e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.973818e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.845912 sec +INFO: No Floating Point Exceptions have been reported + 4,903,860,646 cycles # 2.651 GHz + 12,352,108,328 instructions # 2.52 insn per cycle + 1.850421022 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.711524e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.723541e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.723541e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.457485 sec +INFO: No Floating Point Exceptions have been reported + 4,130,677,154 cycles # 1.678 GHz + 6,346,127,118 instructions # 1.54 insn per cycle + 2.462055019 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt index b1a908cb26..07bc3b6c73 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_16:34:45 +DATE: 2024-05-16_15:12:42 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.316994e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.477450e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.478126e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.490501e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.518177e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.520849e+05 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.658412 sec +TOTAL : 0.515008 sec INFO: No Floating Point Exceptions have been reported - 1,952,426,785 cycles:u # 2.906 GHz (74.78%) - 2,611,996 stalled-cycles-frontend:u # 0.13% frontend cycles idle (74.90%) - 49,330,094 stalled-cycles-backend:u # 2.53% backend cycles idle (75.08%) - 2,198,477,186 instructions:u # 1.13 insn per cycle - # 0.02 stalled cycles per insn (75.14%) - 0.699140909 seconds time elapsed + 2,117,861,647 cycles # 2.847 GHz + 3,355,581,223 instructions # 1.58 insn per cycle + 0.805282012 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.238511e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.241352e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.241408e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.120060e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.152876e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.154244e+05 ) sec^-1 MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 -TOTAL : 8.406942 sec +TOTAL : 3.146209 sec INFO: No Floating Point Exceptions have been reported - 28,876,407,216 cycles:u # 3.423 GHz (74.90%) - 11,676,178 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.97%) - 1,120,027,574 stalled-cycles-backend:u # 3.88% backend cycles idle (75.01%) - 22,611,604,116 instructions:u # 0.78 insn per cycle - # 0.05 stalled cycles per insn (75.02%) - 8.461854871 seconds time elapsed + 9,794,350,225 cycles # 2.878 GHz + 20,567,996,876 instructions # 2.10 insn per cycle + 3.458179285 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158101E-004 -Relative difference = 2.837296517127185e-07 +Avg ME (F77/GPU) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.189529e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.190387e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.190387e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.854249e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.855163e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.855163e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 7.502176 sec +TOTAL : 8.856220 sec INFO: No Floating Point Exceptions have been reported - 26,067,483,019 cycles:u # 3.474 GHz (74.95%) - 24,047,816 stalled-cycles-frontend:u # 0.09% frontend cycles idle (74.95%) - 3,393,470,564 stalled-cycles-backend:u # 13.02% backend cycles idle (74.98%) - 81,696,784,837 instructions:u # 3.13 insn per cycle - # 0.04 stalled cycles per insn (75.03%) - 7.513935591 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 6614) (avx2: 0) (512y: 0) (512z: 0) + 25,606,958,110 cycles # 2.890 GHz + 78,936,876,492 instructions # 3.08 insn per cycle + 8.860490718 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4893) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 5.016907e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.021312e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.021312e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.547585e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.550823e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.550823e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.280765 sec +TOTAL : 4.633866 sec INFO: No Floating Point Exceptions have been reported - 11,488,529,274 cycles:u # 3.499 GHz (74.91%) - 695,887 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.91%) - 1,564,018,994 stalled-cycles-backend:u # 13.61% backend cycles idle (74.91%) - 39,228,204,285 instructions:u # 3.41 insn per cycle - # 0.04 stalled cycles per insn (74.95%) - 3.284992005 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:12814) (avx2: 0) (512y: 0) (512z: 0) + 12,886,616,952 cycles # 2.779 GHz + 39,279,548,039 instructions # 3.05 insn per cycle + 4.638052623 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13184) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.179905e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.182395e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.182395e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.950793e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.966539e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.966539e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.401951 sec +TOTAL : 2.073967 sec INFO: No Floating Point Exceptions have been reported - 4,819,349,124 cycles:u # 3.431 GHz (74.95%) - 451,954 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.94%) - 596,939,385 stalled-cycles-backend:u # 12.39% backend cycles idle (74.94%) - 13,715,502,656 instructions:u # 2.85 insn per cycle - # 0.04 stalled cycles per insn (74.94%) - 1.406063146 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11059) (512y: 0) (512z: 0) + 5,577,712,569 cycles # 2.685 GHz + 13,684,498,611 instructions # 2.45 insn per cycle + 2.078154877 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157309E-004 -Relative difference = 2.837296636563793e-07 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.068596e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.089664e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.089664e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.820316 sec +INFO: No Floating Point Exceptions have been reported + 4,894,997,970 cycles # 2.684 GHz + 12,339,079,686 instructions # 2.52 insn per cycle + 1.824557454 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.817590e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.829323e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.829323e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 2.417091 sec +INFO: No Floating Point Exceptions have been reported + 4,131,104,953 cycles # 1.707 GHz + 6,332,486,091 instructions # 1.53 insn per cycle + 2.421265188 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt index a0e80c25fb..b300efd9c0 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt @@ -1,195 +1,236 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_16:32:35 +DATE: 2024-05-16_15:07:12 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.344131e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.490608e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.491379e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.681519 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.175456e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.487401e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.489887e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.516778 sec INFO: No Floating Point Exceptions have been reported - 1,974,279,625 cycles:u # 2.884 GHz (73.78%) - 2,954,441 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.98%) - 35,227,763 stalled-cycles-backend:u # 1.78% backend cycles idle (75.37%) - 2,180,318,281 instructions:u # 1.10 insn per cycle - # 0.02 stalled cycles per insn (75.43%) - 0.724160088 seconds time elapsed + 2,106,838,284 cycles # 2.817 GHz + 3,334,047,065 instructions # 1.58 insn per cycle + 0.806903831 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.212854e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.246626e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.246682e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 -TOTAL : 8.555132 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.725415e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.181222e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.182613e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.208931 sec INFO: No Floating Point Exceptions have been reported - 28,616,243,213 cycles:u # 3.340 GHz (74.87%) - 23,129,446 stalled-cycles-frontend:u # 0.08% frontend cycles idle (74.95%) - 1,128,438,495 stalled-cycles-backend:u # 3.94% backend cycles idle (75.04%) - 22,952,283,801 instructions:u # 0.80 insn per cycle - # 0.05 stalled cycles per insn (75.08%) - 8.616154337 seconds time elapsed + 9,884,616,856 cycles # 2.852 GHz + 22,569,706,597 instructions # 2.28 insn per cycle + 3.521271497 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158101E-004 -Relative difference = 2.837296517127185e-07 +Avg ME (F77/GPU) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.131810e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.132645e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.132645e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 7.706205 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.838805e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.839662e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.839662e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.928794 sec INFO: No Floating Point Exceptions have been reported - 26,078,373,962 cycles:u # 3.383 GHz (74.99%) - 25,079,225 stalled-cycles-frontend:u # 0.10% frontend cycles idle (74.99%) - 3,510,795,188 stalled-cycles-backend:u # 13.46% backend cycles idle (74.99%) - 81,685,715,238 instructions:u # 3.13 insn per cycle - # 0.04 stalled cycles per insn (74.99%) - 7.710674176 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 6614) (avx2: 0) (512y: 0) (512z: 0) + 25,578,535,475 cycles # 2.864 GHz + 78,941,438,017 instructions # 3.09 insn per cycle + 8.932959256 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4893) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.865797e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.870539e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.870539e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.383410 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.490750e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.493870e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.493870e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.708141 sec INFO: No Floating Point Exceptions have been reported - 11,476,947,568 cycles:u # 3.389 GHz (74.97%) - 717,345 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.96%) - 1,394,339,634 stalled-cycles-backend:u # 12.15% backend cycles idle (74.96%) - 39,175,337,651 instructions:u # 3.41 insn per cycle - # 0.04 stalled cycles per insn (74.96%) - 3.387986892 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:12814) (avx2: 0) (512y: 0) (512z: 0) + 12,873,433,154 cycles # 2.733 GHz + 39,280,620,994 instructions # 3.05 insn per cycle + 4.712353785 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13184) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.152744e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.155057e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.155057e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.434251 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.853620e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.869024e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.869024e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.097850 sec INFO: No Floating Point Exceptions have been reported - 4,860,085,854 cycles:u # 3.382 GHz (74.95%) - 414,269 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.95%) - 598,023,717 stalled-cycles-backend:u # 12.30% backend cycles idle (74.95%) - 13,704,018,164 instructions:u # 2.82 insn per cycle - # 0.04 stalled cycles per insn (74.95%) - 1.438315794 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11059) (512y: 0) (512z: 0) + 5,573,477,429 cycles # 2.652 GHz + 13,685,909,410 instructions # 2.46 insn per cycle + 2.102047066 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157309E-004 -Relative difference = 2.837296636563793e-07 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.970623e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.991129e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.991129e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.837793 sec +INFO: No Floating Point Exceptions have been reported + 4,885,535,539 cycles # 2.653 GHz + 12,340,762,979 instructions # 2.53 insn per cycle + 1.841998870 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.715803e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.727367e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.727367e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.451983 sec +INFO: No Floating Point Exceptions have been reported + 4,110,713,398 cycles # 1.674 GHz + 6,334,867,690 instructions # 1.54 insn per cycle + 2.456147392 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt index 98f971e8da..254c65fd8c 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_15:59:44 +DATE: 2024-05-16_14:40:17 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.383801e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.441172e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.441561e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 -TOTAL : 0.581765 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.472040e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.495257e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.497568e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.530193 sec INFO: No Floating Point Exceptions have been reported - 1,420,717,109 cycles:u # 2.446 GHz (73.59%) - 2,253,860 stalled-cycles-frontend:u # 0.16% frontend cycles idle (75.62%) - 6,648,355 stalled-cycles-backend:u # 0.47% backend cycles idle (75.84%) - 1,777,674,227 instructions:u # 1.25 insn per cycle - # 0.00 stalled cycles per insn (74.83%) - 0.643584224 seconds time elapsed + 2,179,825,483 cycles # 2.820 GHz + 3,416,926,116 instructions # 1.57 insn per cycle + 0.832303660 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.739960e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.745030e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.745144e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 -TOTAL : 6.374862 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.149957e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.177471e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.178689e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.025570 sec INFO: No Floating Point Exceptions have been reported - 19,281,030,721 cycles:u # 3.009 GHz (74.90%) - 2,979,566 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.92%) - 5,145,908 stalled-cycles-backend:u # 0.03% backend cycles idle (74.99%) - 15,515,306,743 instructions:u # 0.80 insn per cycle - # 0.00 stalled cycles per insn (75.03%) - 6.432094557 seconds time elapsed + 9,343,829,120 cycles # 2.851 GHz + 20,017,847,921 instructions # 2.14 insn per cycle + 3.337093329 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158101E-004 -Relative difference = 2.837296517127185e-07 +Avg ME (F77/GPU) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.965819e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.966613e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.966613e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 8.355971 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.844549e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.845438e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.845438e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.900941 sec INFO: No Floating Point Exceptions have been reported - 25,786,448,242 cycles:u # 3.084 GHz (74.95%) - 1,280,141 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.99%) - 2,966,784,019 stalled-cycles-backend:u # 11.51% backend cycles idle (75.03%) - 81,685,731,850 instructions:u # 3.17 insn per cycle - # 0.04 stalled cycles per insn (75.03%) - 8.364397457 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 6589) (avx2: 0) (512y: 0) (512z: 0) + 25,492,945,375 cycles # 2.863 GHz + 78,715,017,784 instructions # 3.09 insn per cycle + 8.905151100 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4264) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 5.039107e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.043696e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.043696e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.266114 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.432714e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.435728e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.435728e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.786568 sec INFO: No Floating Point Exceptions have been reported - 11,454,279,152 cycles:u # 3.503 GHz (74.91%) - 713,745 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.02%) - 1,542,827,344 stalled-cycles-backend:u # 13.47% backend cycles idle (75.05%) - 39,150,001,362 instructions:u # 3.42 insn per cycle - # 0.04 stalled cycles per insn (75.05%) - 3.272776435 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:12771) (avx2: 0) (512y: 0) (512z: 0) + 12,968,671,480 cycles # 2.709 GHz + 39,227,279,421 instructions # 3.02 insn per cycle + 4.790848376 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:12951) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.193623e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.196165e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.196165e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.385365 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.791500e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.806568e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.806568e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.114108 sec INFO: No Floating Point Exceptions have been reported - 4,853,097,891 cycles:u # 3.494 GHz (74.66%) - 2,985,761 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.84%) - 585,936,021 stalled-cycles-backend:u # 12.07% backend cycles idle (75.13%) - 13,737,946,664 instructions:u # 2.83 insn per cycle - # 0.04 stalled cycles per insn (75.23%) - 1.392136375 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11048) (512y: 0) (512z: 0) + 5,617,875,214 cycles # 2.653 GHz + 13,801,216,605 instructions # 2.46 insn per cycle + 2.118326582 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11422) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157309E-004 -Relative difference = 2.837296636563793e-07 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.808696e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.827867e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.827867e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.871093 sec +INFO: No Floating Point Exceptions have been reported + 4,977,184,975 cycles # 2.656 GHz + 12,467,160,434 instructions # 2.50 insn per cycle + 1.875328468 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10258) (512y: 240) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.708154e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.719459e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.719459e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.454314 sec +INFO: No Floating Point Exceptions have been reported + 4,118,637,907 cycles # 1.676 GHz + 6,458,862,875 instructions # 1.57 insn per cycle + 2.458530246 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1647) (512y: 192) (512z: 9375) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt index c204930b95..452f4e853d 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_16:15:57 +DATE: 2024-05-16_14:53:22 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.333504e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.473180e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.473971e+04 ) sec^-1 -MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 -TOTAL : 0.641454 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.253411e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.278108e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.280152e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.538797 sec INFO: No Floating Point Exceptions have been reported - 1,921,374,787 cycles:u # 2.944 GHz (74.31%) - 2,218,794 stalled-cycles-frontend:u # 0.12% frontend cycles idle (74.46%) - 5,689,437 stalled-cycles-backend:u # 0.30% backend cycles idle (75.40%) - 2,098,072,606 instructions:u # 1.09 insn per cycle - # 0.00 stalled cycles per insn (75.62%) - 0.686609136 seconds time elapsed + 2,198,780,840 cycles # 2.857 GHz + 3,392,092,682 instructions # 1.54 insn per cycle + 0.826434194 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.243251e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.246011e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.246068e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 -TOTAL : 7.730002 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.756018e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.782691e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.783822e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.316768 sec INFO: No Floating Point Exceptions have been reported - 26,412,911,991 cycles:u # 3.420 GHz (75.01%) - 3,115,612 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.04%) - 6,161,858 stalled-cycles-backend:u # 0.02% backend cycles idle (75.05%) - 21,073,639,603 instructions:u # 0.80 insn per cycle - # 0.00 stalled cycles per insn (75.05%) - 7.782665100 seconds time elapsed + 10,315,360,608 cycles # 2.881 GHz + 23,624,745,879 instructions # 2.29 insn per cycle + 3.638219909 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158101E-004 -Relative difference = 2.837296517127185e-07 +Avg ME (F77/GPU) = 6.6266731198158122E-004 +Relative difference = 2.837296513854949e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.500085e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.500454e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.500454e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 36.455677 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.179521e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.179964e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.179964e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 39.248920 sec INFO: No Floating Point Exceptions have been reported - 126,603,172,584 cycles:u # 3.473 GHz (74.99%) - 52,649,839 stalled-cycles-frontend:u # 0.04% frontend cycles idle (75.00%) - 16,830,397,272 stalled-cycles-backend:u # 13.29% backend cycles idle (75.01%) - 140,984,420,050 instructions:u # 1.11 insn per cycle - # 0.12 stalled cycles per insn (75.01%) - 36.462809036 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:21092) (avx2: 0) (512y: 0) (512z: 0) + 113,511,319,041 cycles # 2.892 GHz + 144,820,446,927 instructions # 1.28 insn per cycle + 39.253177511 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:21353) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198140461E-004 -Relative difference = 2.8372991790910424e-07 +Avg ME (F77/C++) = 6.6266731198140450E-004 +Relative difference = 2.83729918072716e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.620673e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.623024e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.623024e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 4.541767 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.047626e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.050057e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.050057e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 5.390362 sec INFO: No Floating Point Exceptions have been reported - 15,788,847,464 cycles:u # 3.474 GHz (75.01%) - 847,416 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.01%) - 7,459,545,585 stalled-cycles-backend:u # 47.25% backend cycles idle (75.01%) - 37,454,674,896 instructions:u # 2.37 insn per cycle - # 0.20 stalled cycles per insn (75.01%) - 4.548435574 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:68052) (avx2: 0) (512y: 0) (512z: 0) + 14,740,564,650 cycles # 2.733 GHz + 37,575,494,329 instructions # 2.55 insn per cycle + 5.394647902 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:68119) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198141220E-004 -Relative difference = 2.837299064562788e-07 +Avg ME (F77/C++) = 6.6266731198141209E-004 +Relative difference = 2.8372990661989057e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 7.376401e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.385763e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.385763e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 2.236058 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.230737e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.243892e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.243892e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.278344 sec INFO: No Floating Point Exceptions have been reported - 7,694,540,236 cycles:u # 3.435 GHz (75.03%) - 4,856,635 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.00%) - 4,403,069,372 stalled-cycles-backend:u # 57.22% backend cycles idle (75.00%) - 12,893,293,961 instructions:u # 1.68 insn per cycle - # 0.34 stalled cycles per insn (75.00%) - 2.242659070 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:46593) (512y: 0) (512z: 0) + 6,134,003,628 cycles # 2.689 GHz + 13,061,930,844 instructions # 2.13 insn per cycle + 2.282738143 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:46960) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198156778E-004 -Relative difference = 2.837296716733571e-07 +Avg ME (F77/C++) = 6.6266731198156789E-004 +Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.779670e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.799133e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.799133e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.877722 sec +INFO: No Floating Point Exceptions have been reported + 5,068,047,565 cycles # 2.694 GHz + 11,440,450,267 instructions # 2.26 insn per cycle + 1.882139324 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:40434) (512y: 285) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198156789E-004 +Relative difference = 2.837296715097453e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.093705e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.106755e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.106755e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.321771 sec +INFO: No Floating Point Exceptions have been reported + 3,974,444,581 cycles # 1.709 GHz + 5,942,873,144 instructions # 1.50 insn per cycle + 2.326156002 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2455) (512y: 337) (512z:39411) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198156789E-004 +Relative difference = 2.837296715097453e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt index 82dbeb63c7..00ea23e18d 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_16:16:58 +DATE: 2024-05-16_14:54:29 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.381471e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.432629e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.432855e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 -TOTAL : 0.533772 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.259147e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.284136e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.286360e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.536782 sec INFO: No Floating Point Exceptions have been reported - 1,495,230,532 cycles:u # 2.798 GHz (74.57%) - 2,178,277 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.79%) - 5,373,727 stalled-cycles-backend:u # 0.36% backend cycles idle (75.94%) - 1,821,435,799 instructions:u # 1.22 insn per cycle - # 0.00 stalled cycles per insn (76.03%) - 0.579795308 seconds time elapsed + 2,193,506,190 cycles # 2.857 GHz + 3,337,314,407 instructions # 1.52 insn per cycle + 0.824492176 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.738658e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.743473e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.743590e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 -TOTAL : 6.543267 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.761556e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.788263e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.789425e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.301197 sec INFO: No Floating Point Exceptions have been reported - 21,689,463,641 cycles:u # 3.410 GHz (74.90%) - 2,829,483 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.01%) - 5,900,164 stalled-cycles-backend:u # 0.03% backend cycles idle (74.97%) - 17,423,262,017 instructions:u # 0.80 insn per cycle - # 0.00 stalled cycles per insn (74.99%) - 6.597249694 seconds time elapsed + 10,264,886,616 cycles # 2.886 GHz + 23,377,018,059 instructions # 2.28 insn per cycle + 3.615104997 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158101E-004 -Relative difference = 2.837296517127185e-07 +Avg ME (F77/GPU) = 6.6266731198158122E-004 +Relative difference = 2.837296513854949e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.357215e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.357558e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.357558e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 37.650036 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.170908e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.171353e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.171353e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 39.330349 sec INFO: No Floating Point Exceptions have been reported - 128,096,240,554 cycles:u # 3.402 GHz (75.00%) - 161,153,961 stalled-cycles-frontend:u # 0.13% frontend cycles idle (74.99%) - 17,514,129,534 stalled-cycles-backend:u # 13.67% backend cycles idle (74.99%) - 141,505,351,633 instructions:u # 1.10 insn per cycle - # 0.12 stalled cycles per insn (74.99%) - 37.674452732 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:21725) (avx2: 0) (512y: 0) (512z: 0) + 113,688,017,774 cycles # 2.891 GHz + 144,788,018,158 instructions # 1.27 insn per cycle + 39.334720458 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:20719) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198140461E-004 Relative difference = 2.8372991790910424e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.399478e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.401773e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.401773e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 4.836726 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.974783e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.977013e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.977013e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 5.522774 sec INFO: No Floating Point Exceptions have been reported - 16,174,975,807 cycles:u # 3.342 GHz (74.92%) - 11,299,672 stalled-cycles-frontend:u # 0.07% frontend cycles idle (75.00%) - 6,032,416,709 stalled-cycles-backend:u # 37.29% backend cycles idle (75.05%) - 37,536,936,894 instructions:u # 2.32 insn per cycle - # 0.16 stalled cycles per insn (75.05%) - 4.844983395 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:68056) (avx2: 0) (512y: 0) (512z: 0) + 15,220,566,650 cycles # 2.755 GHz + 37,763,046,074 instructions # 2.48 insn per cycle + 5.527045303 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:68447) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198141220E-004 -Relative difference = 2.837299064562788e-07 +Avg ME (F77/C++) = 6.6266731198141209E-004 +Relative difference = 2.8372990661989057e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 7.532310e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.542495e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.542495e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 2.189187 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.412795e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.426610e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.426610e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.222010 sec INFO: No Floating Point Exceptions have been reported - 7,597,122,223 cycles:u # 3.464 GHz (74.83%) - 2,836,861 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.94%) - 4,216,614,115 stalled-cycles-backend:u # 55.50% backend cycles idle (75.12%) - 12,778,244,268 instructions:u # 1.68 insn per cycle - # 0.33 stalled cycles per insn (75.20%) - 2.196413871 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:45663) (512y: 0) (512z: 0) + 6,000,419,836 cycles # 2.696 GHz + 12,896,174,142 instructions # 2.15 insn per cycle + 2.226315650 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:45929) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198156778E-004 -Relative difference = 2.837296716733571e-07 +Avg ME (F77/C++) = 6.6266731198156789E-004 +Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.743711e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.762861e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.762861e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.885038 sec +INFO: No Floating Point Exceptions have been reported + 5,086,798,971 cycles # 2.694 GHz + 11,447,968,989 instructions # 2.25 insn per cycle + 1.889284279 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:40123) (512y: 219) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198156789E-004 +Relative difference = 2.837296715097453e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.141072e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.153903e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.153903e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.306249 sec +INFO: No Floating Point Exceptions have been reported + 3,947,559,408 cycles # 1.709 GHz + 5,896,754,674 instructions # 1.49 insn per cycle + 2.310527958 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1971) (512y: 259) (512z:38937) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198156789E-004 +Relative difference = 2.837296715097453e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 988b54655f..15bbe59069 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_16:00:11 +DATE: 2024-05-16_14:40:50 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.455591e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.730172e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.730916e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.535870e-02 +- 4.279978e-02 ) GeV^-4 -TOTAL : 0.461829 sec +EvtsPerSec[Rmb+ME] (23) = ( 6.326887e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.370559e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.376371e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 +TOTAL : 0.487782 sec INFO: No Floating Point Exceptions have been reported - 1,181,103,355 cycles:u # 2.678 GHz (72.77%) - 2,044,415 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.83%) - 5,372,072 stalled-cycles-backend:u # 0.45% backend cycles idle (75.49%) - 1,609,274,886 instructions:u # 1.36 insn per cycle - # 0.00 stalled cycles per insn (74.63%) - 0.543216903 seconds time elapsed + 1,984,813,926 cycles # 2.807 GHz + 2,933,686,219 instructions # 1.48 insn per cycle + 0.764328783 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.698964e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.725804e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.726247e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.763201e+01 +- 6.205778e+01 ) GeV^-4 -TOTAL : 2.611783 sec +EvtsPerSec[Rmb+ME] (23) = ( 8.584549e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.644337e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.647136e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 +TOTAL : 1.720884 sec INFO: No Floating Point Exceptions have been reported - 8,754,892,668 cycles:u # 3.324 GHz (75.10%) - 2,394,917 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.04%) - 4,639,328 stalled-cycles-backend:u # 0.05% backend cycles idle (75.01%) - 7,446,007,909 instructions:u # 0.85 insn per cycle - # 0.00 stalled cycles per insn (74.91%) - 2.657197180 seconds time elapsed + 5,560,374,951 cycles # 2.849 GHz + 11,900,809,748 instructions # 2.14 insn per cycle + 2.008088048 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 6.626791e-04 -Avg ME (F77/GPU) = 6.6270899361878938E-004 -Relative difference = 4.511024836808726e-05 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262659968156085E-004 +Relative difference = 2.8371612387547027e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.469441e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.470531e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.470531e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208458e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 6.651593 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.909633e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.910547e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.910547e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 +TOTAL : 8.596251 sec INFO: No Floating Point Exceptions have been reported - 23,317,370,497 cycles:u # 3.504 GHz (74.98%) - 948,384 stalled-cycles-frontend:u # 0.00% frontend cycles idle (75.00%) - 3,079,063,251 stalled-cycles-backend:u # 13.21% backend cycles idle (75.00%) - 75,776,373,603 instructions:u # 3.25 insn per cycle - # 0.04 stalled cycles per insn (75.00%) - 6.660009095 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3898) (avx2: 0) (512y: 0) (512z: 0) + 24,624,004,022 cycles # 2.864 GHz + 78,129,381,217 instructions # 3.17 insn per cycle + 8.600293639 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3603) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627487e-04 -Avg ME (F77/C++) = 6.6274866115424713E-004 -Relative difference = 5.861309557415831e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274863266294753E-004 +Relative difference = 4.92840687132121e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 9.913769e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.931483e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.931483e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 1.664463 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.891953e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.904635e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.904635e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 +TOTAL : 2.387612 sec INFO: No Floating Point Exceptions have been reported - 5,806,934,221 cycles:u # 3.482 GHz (75.06%) - 546,050 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.06%) - 816,845,040 stalled-cycles-backend:u # 14.07% backend cycles idle (75.06%) - 20,041,870,445 instructions:u # 3.45 insn per cycle - # 0.04 stalled cycles per insn (75.06%) - 1.670978010 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13237) (avx2: 0) (512y: 0) (512z: 0) + 6,469,659,104 cycles # 2.706 GHz + 20,120,611,338 instructions # 3.11 insn per cycle + 2.391816623 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627485e-04 -Avg ME (F77/C++) = 6.6274845946848876E-004 -Relative difference = 6.115670001294808e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274861460025036E-004 +Relative difference = 2.2029847170826283e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.367496e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.377823e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.377823e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214980e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 0.702661 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.562010e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.568248e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.568248e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 1.059004 sec INFO: No Floating Point Exceptions have been reported - 2,448,534,363 cycles:u # 3.468 GHz (75.07%) - 202,280 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.08%) - 246,454,849 stalled-cycles-backend:u # 10.07% backend cycles idle (75.08%) - 6,977,330,815 instructions:u # 2.85 insn per cycle - # 0.04 stalled cycles per insn (75.08%) - 0.708851823 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11604) (512y: 0) (512z: 0) + 2,818,181,262 cycles # 2.654 GHz + 6,988,460,270 instructions # 2.48 insn per cycle + 1.063195979 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271947045332125E-004 -Relative difference = 4.4583988847766445e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174574524E-004 +Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.763183e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.771185e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.771185e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.938662 sec +INFO: No Floating Point Exceptions have been reported + 2,488,393,509 cycles # 2.641 GHz + 6,295,244,635 instructions # 2.53 insn per cycle + 0.942828770 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174574524E-004 +Relative difference = 2.7544470208782633e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.363218e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.368048e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.368048e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 +TOTAL : 1.211358 sec +INFO: No Floating Point Exceptions have been reported + 2,044,658,355 cycles # 1.683 GHz + 3,265,998,063 instructions # 1.60 insn per cycle + 1.215542758 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2427) (512y: 46) (512z: 9578) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271952779373838E-004 +Relative difference = 4.193891735414155e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt index 031177f82a..e281ad389f 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt @@ -1,204 +1,250 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_16:28:47 +DATE: 2024-05-16_15:03:26 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.553549e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.723413e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.723413e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.202335e-01 +- 3.251521e-01 ) GeV^-4 -TOTAL : 0.448264 sec +EvtsPerSec[Rmb+ME] (23) = ( 5.615502e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.322427e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.322427e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 +TOTAL : 0.477115 sec INFO: No Floating Point Exceptions have been reported - 1,251,529,402 cycles:u # 2.699 GHz (75.20%) - 2,893,169 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.15%) - 33,761,836 stalled-cycles-backend:u # 2.70% backend cycles idle (74.29%) - 1,611,617,832 instructions:u # 1.29 insn per cycle - # 0.02 stalled cycles per insn (75.70%) - 0.494487066 seconds time elapsed + 1,936,349,619 cycles # 2.809 GHz + 2,877,179,431 instructions # 1.49 insn per cycle + 0.747561501 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.265840e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.707070e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.707070e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.213799e+02 +- 1.195366e+02 ) GeV^-4 -TOTAL : 3.437893 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.243623e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.556013e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.556013e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.641710e+00 +- 4.994249e+00 ) GeV^-4 +TOTAL : 1.907816 sec INFO: No Floating Point Exceptions have been reported - 11,545,486,584 cycles:u # 3.334 GHz (74.94%) - 21,685,421 stalled-cycles-frontend:u # 0.19% frontend cycles idle (74.85%) - 1,139,000,542 stalled-cycles-backend:u # 9.87% backend cycles idle (74.84%) - 9,911,182,338 instructions:u # 0.86 insn per cycle - # 0.11 stalled cycles per insn (75.10%) - 3.489454557 seconds time elapsed + 6,131,638,198 cycles # 2.845 GHz + 12,981,768,605 instructions # 2.12 insn per cycle + 2.213144159 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 6.626791e-04 -Avg ME (F77/GPU) = 6.6270899361878938E-004 -Relative difference = 4.511024836808726e-05 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262659968156085E-004 +Relative difference = 2.8371612387547027e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.456445e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.457487e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.457487e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208458e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 6.688589 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.909165e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.910120e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.910120e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 +TOTAL : 8.600727 sec INFO: No Floating Point Exceptions have been reported - 23,329,106,189 cycles:u # 3.486 GHz (74.99%) - 964,423 stalled-cycles-frontend:u # 0.00% frontend cycles idle (75.02%) - 3,034,499,541 stalled-cycles-backend:u # 13.01% backend cycles idle (75.02%) - 75,768,705,801 instructions:u # 3.25 insn per cycle - # 0.04 stalled cycles per insn (75.02%) - 6.696220927 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3898) (avx2: 0) (512y: 0) (512z: 0) + 24,637,778,479 cycles # 2.864 GHz + 78,132,610,249 instructions # 3.17 insn per cycle + 8.604942209 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3603) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627487e-04 -Avg ME (F77/C++) = 6.6274866115424713E-004 -Relative difference = 5.861309557415831e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274863266294753E-004 +Relative difference = 4.92840687132121e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 9.914382e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.931837e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.931837e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 1.666359 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.457452e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.468775e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.468775e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 +TOTAL : 2.550311 sec INFO: No Floating Point Exceptions have been reported - 5,845,631,490 cycles:u # 3.501 GHz (74.83%) - 379,736 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.04%) - 808,475,687 stalled-cycles-backend:u # 13.83% backend cycles idle (75.09%) - 20,043,502,475 instructions:u # 3.43 insn per cycle - # 0.04 stalled cycles per insn (75.09%) - 1.672663397 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13237) (avx2: 0) (512y: 0) (512z: 0) + 6,935,275,139 cycles # 2.716 GHz + 20,130,100,658 instructions # 2.90 insn per cycle + 2.554710358 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627485e-04 -Avg ME (F77/C++) = 6.6274845946848876E-004 -Relative difference = 6.115670001294808e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274861460025036E-004 +Relative difference = 2.2029847170826283e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.357311e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.367460e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.367460e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214980e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 0.707778 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.550561e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.557160e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.557160e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 1.068476 sec INFO: No Floating Point Exceptions have been reported - 2,463,882,722 cycles:u # 3.463 GHz (74.95%) - 628,681 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.26%) - 248,780,129 stalled-cycles-backend:u # 10.10% backend cycles idle (75.26%) - 6,979,514,050 instructions:u # 2.83 insn per cycle - # 0.04 stalled cycles per insn (75.26%) - 0.714446892 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11604) (512y: 0) (512z: 0) + 2,830,711,742 cycles # 2.640 GHz + 6,997,830,070 instructions # 2.47 insn per cycle + 1.072903816 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271947045332125E-004 -Relative difference = 4.4583988847766445e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174574524E-004 +Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.772339e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.780808e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.780808e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.936387 sec +INFO: No Floating Point Exceptions have been reported + 2,497,824,247 cycles # 2.658 GHz + 6,305,168,616 instructions # 2.52 insn per cycle + 0.940674173 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174574524E-004 +Relative difference = 2.7544470208782633e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.362852e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.367803e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.367803e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 +TOTAL : 1.214531 sec +INFO: No Floating Point Exceptions have been reported + 2,054,265,568 cycles # 1.686 GHz + 3,276,400,100 instructions # 1.59 insn per cycle + 1.218830996 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2427) (512y: 46) (512z: 9578) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271952779373838E-004 +Relative difference = 4.193891735414155e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt index b462198372..1c3846a692 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_16:35:13 +DATE: 2024-05-16_15:13:15 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.481260e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.715421e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.716098e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.202247e-01 +- 3.251485e-01 ) GeV^-4 -TOTAL : 0.438030 sec +EvtsPerSec[Rmb+ME] (23) = ( 6.362325e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.415082e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.420724e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.159397e-01 +- 3.238804e-01 ) GeV^-4 +TOTAL : 0.471632 sec INFO: No Floating Point Exceptions have been reported - 1,220,333,101 cycles:u # 2.666 GHz (75.54%) - 2,304,041 stalled-cycles-frontend:u # 0.19% frontend cycles idle (74.67%) - 45,270,810 stalled-cycles-backend:u # 3.71% backend cycles idle (74.67%) - 1,585,479,906 instructions:u # 1.30 insn per cycle - # 0.03 stalled cycles per insn (74.91%) - 0.479059290 seconds time elapsed + 1,968,514,245 cycles # 2.841 GHz + 2,902,581,432 instructions # 1.47 insn per cycle + 0.750206216 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.684395e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.723876e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.724324e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.213664e+02 +- 1.195366e+02 ) GeV^-4 -TOTAL : 3.308698 sec +EvtsPerSec[Rmb+ME] (23) = ( 8.620229e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.693332e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.696842e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.094367e+02 +- 1.071509e+02 ) GeV^-4 +TOTAL : 1.810763 sec INFO: No Floating Point Exceptions have been reported - 11,006,758,594 cycles:u # 3.302 GHz (74.97%) - 10,951,906 stalled-cycles-frontend:u # 0.10% frontend cycles idle (75.13%) - 1,139,355,084 stalled-cycles-backend:u # 10.35% backend cycles idle (75.08%) - 8,915,439,098 instructions:u # 0.81 insn per cycle - # 0.13 stalled cycles per insn (75.01%) - 3.359798684 seconds time elapsed + 5,856,364,996 cycles # 2.870 GHz + 12,360,478,892 instructions # 2.11 insn per cycle + 2.100167053 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 6.626791e-04 -Avg ME (F77/GPU) = 6.6270899361878938E-004 -Relative difference = 4.511024836808726e-05 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262659968156085E-004 +Relative difference = 2.8371612387547027e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.456233e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.457279e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.457279e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208458e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 6.687238 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.921942e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.922884e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.922884e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 8.542686 sec INFO: No Floating Point Exceptions have been reported - 23,293,795,292 cycles:u # 3.482 GHz (75.01%) - 1,274,168 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.01%) - 3,029,191,292 stalled-cycles-backend:u # 13.00% backend cycles idle (75.01%) - 75,830,117,598 instructions:u # 3.26 insn per cycle - # 0.04 stalled cycles per insn (75.01%) - 6.696268134 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3898) (avx2: 0) (512y: 0) (512z: 0) + 24,622,493,732 cycles # 2.881 GHz + 78,127,963,456 instructions # 3.17 insn per cycle + 8.546707601 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3603) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627487e-04 -Avg ME (F77/C++) = 6.6274866115424713E-004 -Relative difference = 5.861309557415831e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274863266294753E-004 +Relative difference = 4.92840687132121e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 9.892092e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.909443e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.909443e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 1.667991 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.925135e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.937766e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.937766e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 +TOTAL : 2.378023 sec INFO: No Floating Point Exceptions have been reported - 5,842,552,310 cycles:u # 3.497 GHz (74.86%) - 397,694 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.09%) - 807,766,317 stalled-cycles-backend:u # 13.83% backend cycles idle (75.10%) - 20,042,846,114 instructions:u # 3.43 insn per cycle - # 0.04 stalled cycles per insn (75.10%) - 1.675653045 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13237) (avx2: 0) (512y: 0) (512z: 0) + 6,481,719,151 cycles # 2.722 GHz + 20,120,720,773 instructions # 3.10 insn per cycle + 2.382079719 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627485e-04 -Avg ME (F77/C++) = 6.6274845946848876E-004 -Relative difference = 6.115670001294808e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274861460025036E-004 +Relative difference = 2.2029847170826283e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.275768e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.285251e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.285251e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214980e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 0.730637 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.581338e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.587855e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.587855e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 +TOTAL : 1.046370 sec INFO: No Floating Point Exceptions have been reported - 2,459,552,259 cycles:u # 3.354 GHz (74.93%) - 740,121 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.92%) - 248,043,808 stalled-cycles-backend:u # 10.08% backend cycles idle (74.91%) - 6,975,163,769 instructions:u # 2.84 insn per cycle - # 0.04 stalled cycles per insn (74.91%) - 0.734545689 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11604) (512y: 0) (512z: 0) + 2,822,358,408 cycles # 2.688 GHz + 6,985,542,199 instructions # 2.48 insn per cycle + 1.050425346 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271947045332125E-004 -Relative difference = 4.4583988847766445e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174574524E-004 +Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.806198e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.814674e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.814674e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 +TOTAL : 0.917862 sec +INFO: No Floating Point Exceptions have been reported + 2,496,110,223 cycles # 2.709 GHz + 6,293,657,033 instructions # 2.52 insn per cycle + 0.921934399 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174574524E-004 +Relative difference = 2.7544470208782633e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.393764e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.398765e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.398765e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 +TOTAL : 1.186576 sec +INFO: No Floating Point Exceptions have been reported + 2,050,577,153 cycles # 1.723 GHz + 3,264,219,053 instructions # 1.59 insn per cycle + 1.190613213 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2427) (512y: 46) (512z: 9578) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271952779373838E-004 +Relative difference = 4.193891735414155e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt index 582f69d8fa..dc12ca7aae 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt @@ -1,195 +1,236 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_16:33:03 +DATE: 2024-05-16_15:07:45 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.570878e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.733414e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.734046e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.202335e-01 +- 3.251521e-01 ) GeV^-4 -TOTAL : 0.453497 sec +EvtsPerSec[Rmb+ME] (23) = ( 5.747793e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.405382e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.411341e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 +TOTAL : 0.473580 sec INFO: No Floating Point Exceptions have been reported - 1,268,447,146 cycles:u # 2.672 GHz (72.97%) - 2,815,308 stalled-cycles-frontend:u # 0.22% frontend cycles idle (74.71%) - 33,146,365 stalled-cycles-backend:u # 2.61% backend cycles idle (75.54%) - 1,688,684,066 instructions:u # 1.33 insn per cycle - # 0.02 stalled cycles per insn (74.71%) - 0.497506354 seconds time elapsed + 1,929,031,590 cycles # 2.811 GHz + 2,902,080,173 instructions # 1.50 insn per cycle + 0.744461149 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.286069e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.718768e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.719203e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.213799e+02 +- 1.195366e+02 ) GeV^-4 -TOTAL : 3.413932 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.464876e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.690964e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.694375e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.641710e+00 +- 4.994249e+00 ) GeV^-4 +TOTAL : 1.841417 sec INFO: No Floating Point Exceptions have been reported - 11,242,108,879 cycles:u # 3.266 GHz (75.00%) - 22,119,983 stalled-cycles-frontend:u # 0.20% frontend cycles idle (74.97%) - 1,144,437,843 stalled-cycles-backend:u # 10.18% backend cycles idle (74.91%) - 9,731,759,832 instructions:u # 0.87 insn per cycle - # 0.12 stalled cycles per insn (74.91%) - 3.461960927 seconds time elapsed + 5,892,322,421 cycles # 2.846 GHz + 12,206,550,799 instructions # 2.07 insn per cycle + 2.128532659 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 6.626791e-04 -Avg ME (F77/GPU) = 6.6270899361878938E-004 -Relative difference = 4.511024836808726e-05 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262659968156085E-004 +Relative difference = 2.8371612387547027e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.387999e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.389049e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.389049e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208458e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 6.877878 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.911509e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.912427e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.912427e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 +TOTAL : 8.588000 sec INFO: No Floating Point Exceptions have been reported - 23,305,784,912 cycles:u # 3.387 GHz (75.01%) - 923,326 stalled-cycles-frontend:u # 0.00% frontend cycles idle (75.00%) - 3,093,870,225 stalled-cycles-backend:u # 13.28% backend cycles idle (75.00%) - 75,741,235,779 instructions:u # 3.25 insn per cycle - # 0.04 stalled cycles per insn (75.00%) - 6.882197021 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3898) (avx2: 0) (512y: 0) (512z: 0) + 24,603,486,303 cycles # 2.864 GHz + 78,128,844,221 instructions # 3.18 insn per cycle + 8.592028071 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3603) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627487e-04 -Avg ME (F77/C++) = 6.6274866115424713E-004 -Relative difference = 5.861309557415831e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274863266294753E-004 +Relative difference = 4.92840687132121e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 9.795421e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.812524e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.812524e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 1.684375 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.897521e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.909886e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.909886e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 +TOTAL : 2.385730 sec INFO: No Floating Point Exceptions have been reported - 5,831,244,487 cycles:u # 3.457 GHz (74.88%) - 386,659 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.87%) - 816,144,338 stalled-cycles-backend:u # 14.00% backend cycles idle (74.87%) - 20,097,880,072 instructions:u # 3.45 insn per cycle - # 0.04 stalled cycles per insn (74.87%) - 1.689594333 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13237) (avx2: 0) (512y: 0) (512z: 0) + 6,477,077,766 cycles # 2.711 GHz + 20,121,628,941 instructions # 3.11 insn per cycle + 2.389950461 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627485e-04 -Avg ME (F77/C++) = 6.6274845946848876E-004 -Relative difference = 6.115670001294808e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274861460025036E-004 +Relative difference = 2.2029847170826283e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.355939e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.365846e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.365846e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214980e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 0.706033 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.564279e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.570570e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.570570e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 1.056604 sec INFO: No Floating Point Exceptions have been reported - 2,450,973,730 cycles:u # 3.457 GHz (74.81%) - 233,106 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.17%) - 246,000,590 stalled-cycles-backend:u # 10.04% backend cycles idle (75.17%) - 6,983,784,502 instructions:u # 2.85 insn per cycle - # 0.04 stalled cycles per insn (75.17%) - 0.710121922 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11604) (512y: 0) (512z: 0) + 2,817,814,854 cycles # 2.658 GHz + 6,988,003,654 instructions # 2.48 insn per cycle + 1.060745031 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271947045332125E-004 -Relative difference = 4.4583988847766445e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174574524E-004 +Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.769248e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.777272e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.777272e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.935150 sec +INFO: No Floating Point Exceptions have been reported + 2,489,664,656 cycles # 2.652 GHz + 6,295,373,565 instructions # 2.53 insn per cycle + 0.939255376 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174574524E-004 +Relative difference = 2.7544470208782633e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.359069e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.363945e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.363945e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 +TOTAL : 1.214973 sec +INFO: No Floating Point Exceptions have been reported + 2,049,104,437 cycles # 1.682 GHz + 3,266,431,248 instructions # 1.59 insn per cycle + 1.219018056 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2427) (512y: 46) (512z: 9578) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271952779373838E-004 +Relative difference = 4.193891735414155e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt index 825db12b95..fb9b3d5f50 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_16:00:29 +DATE: 2024-05-16_14:41:16 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.526350e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.732794e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.733472e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.535870e-02 +- 4.279978e-02 ) GeV^-4 -TOTAL : 0.456858 sec +EvtsPerSec[Rmb+ME] (23) = ( 6.355693e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.401036e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.406994e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 +TOTAL : 0.484635 sec INFO: No Floating Point Exceptions have been reported - 1,172,997,975 cycles:u # 2.662 GHz (74.52%) - 2,102,170 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.94%) - 5,426,077 stalled-cycles-backend:u # 0.46% backend cycles idle (74.58%) - 1,608,142,364 instructions:u # 1.37 insn per cycle - # 0.00 stalled cycles per insn (74.86%) - 0.502460179 seconds time elapsed + 1,972,569,534 cycles # 2.816 GHz + 2,939,499,932 instructions # 1.49 insn per cycle + 0.757402101 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.703353e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.735893e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.736329e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.763201e+01 +- 6.205778e+01 ) GeV^-4 -TOTAL : 2.625423 sec +EvtsPerSec[Rmb+ME] (23) = ( 8.619113e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.679355e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.682149e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 +TOTAL : 1.713739 sec INFO: No Floating Point Exceptions have been reported - 8,797,238,441 cycles:u # 3.342 GHz (74.92%) - 2,466,417 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.05%) - 5,893,940 stalled-cycles-backend:u # 0.07% backend cycles idle (75.08%) - 7,429,542,596 instructions:u # 0.84 insn per cycle - # 0.00 stalled cycles per insn (75.08%) - 2.674171484 seconds time elapsed + 5,540,767,327 cycles # 2.848 GHz + 11,699,037,597 instructions # 2.11 insn per cycle + 2.001424634 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 6.626791e-04 -Avg ME (F77/GPU) = 6.6270899361878938E-004 -Relative difference = 4.511024836808726e-05 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262659968156085E-004 +Relative difference = 2.8371612387547027e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.468765e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.469812e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.469812e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208458e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 6.653197 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.914726e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.915647e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.915647e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 +TOTAL : 8.573445 sec INFO: No Floating Point Exceptions have been reported - 23,282,870,099 cycles:u # 3.498 GHz (75.00%) - 939,267 stalled-cycles-frontend:u # 0.00% frontend cycles idle (75.00%) - 2,781,747,290 stalled-cycles-backend:u # 11.95% backend cycles idle (75.00%) - 75,716,409,389 instructions:u # 3.25 insn per cycle - # 0.04 stalled cycles per insn (75.00%) - 6.660070082 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3848) (avx2: 0) (512y: 0) (512z: 0) + 24,558,709,341 cycles # 2.863 GHz + 77,854,833,330 instructions # 3.17 insn per cycle + 8.577561930 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3114) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627487e-04 -Avg ME (F77/C++) = 6.6274866108667618E-004 -Relative difference = 5.871505118544242e-08 +Avg ME (F77/C++) = 6.6274866268634797E-004 +Relative difference = 5.630135835748959e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 9.965818e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.983871e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.983871e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 1.655561 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.975982e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.989277e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.989277e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 +TOTAL : 2.359034 sec INFO: No Floating Point Exceptions have been reported - 5,795,882,258 cycles:u # 3.493 GHz (74.96%) - 386,455 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.93%) - 950,491,218 stalled-cycles-backend:u # 16.40% backend cycles idle (74.93%) - 20,037,204,067 instructions:u # 3.46 insn per cycle - # 0.05 stalled cycles per insn (74.93%) - 1.662161951 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13231) (avx2: 0) (512y: 0) (512z: 0) + 6,427,044,365 cycles # 2.721 GHz + 20,086,102,386 instructions # 3.13 insn per cycle + 2.363343503 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13452) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627485e-04 -Avg ME (F77/C++) = 6.6274845946848876E-004 -Relative difference = 6.115670001294808e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274861465384638E-004 +Relative difference = 2.211071647257023e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.377610e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.387963e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.387963e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214980e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 0.699677 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.504468e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.510257e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.510257e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 1.097904 sec INFO: No Floating Point Exceptions have been reported - 2,444,743,906 cycles:u # 3.477 GHz (75.06%) - 624,230 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.97%) - 293,905,462 stalled-cycles-backend:u # 12.02% backend cycles idle (74.97%) - 6,976,866,477 instructions:u # 2.85 insn per cycle - # 0.04 stalled cycles per insn (74.97%) - 0.705849957 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11587) (512y: 0) (512z: 0) + 2,922,790,348 cycles # 2.654 GHz + 7,129,934,034 instructions # 2.44 insn per cycle + 1.101954791 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12261) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271947045332125E-004 -Relative difference = 4.4583988847766445e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271939668077068E-004 +Relative difference = 5.008498817890231e-09 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.699271e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.706623e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.706623e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.973114 sec +INFO: No Floating Point Exceptions have been reported + 2,595,556,237 cycles # 2.658 GHz + 6,438,662,691 instructions # 2.48 insn per cycle + 0.977341866 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11276) (512y: 27) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271939668077068E-004 +Relative difference = 5.008498817890231e-09 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.316379e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.321013e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.321013e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 +TOTAL : 1.254012 sec +INFO: No Floating Point Exceptions have been reported + 2,116,081,195 cycles # 1.683 GHz + 3,427,806,501 instructions # 1.62 insn per cycle + 1.258282002 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2924) (512y: 22) (512z: 9654) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271952032322112E-004 +Relative difference = 3.066639970473621e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt index c1ed143442..3f8f67a608 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_16:18:01 +DATE: 2024-05-16_14:55:36 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.920897e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.740666e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.741368e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.535870e-02 +- 4.279978e-02 ) GeV^-4 -TOTAL : 1.282174 sec +EvtsPerSec[Rmb+ME] (23) = ( 5.546626e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.586411e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.590653e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 +TOTAL : 0.493388 sec INFO: No Floating Point Exceptions have been reported - 1,167,038,613 cycles:u # 2.658 GHz (74.00%) - 2,071,328 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.53%) - 5,431,611 stalled-cycles-backend:u # 0.47% backend cycles idle (74.88%) - 1,540,476,429 instructions:u # 1.32 insn per cycle - # 0.00 stalled cycles per insn (76.40%) - 1.324941068 seconds time elapsed + 2,052,942,224 cycles # 2.851 GHz + 3,071,897,705 instructions # 1.50 insn per cycle + 0.778091403 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.688286e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.717798e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.718232e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.763201e+01 +- 6.205778e+01 ) GeV^-4 -TOTAL : 2.613969 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.711232e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.769137e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.771784e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 +TOTAL : 1.862053 sec INFO: No Floating Point Exceptions have been reported - 8,765,727,418 cycles:u # 3.327 GHz (74.62%) - 2,501,390 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.81%) - 5,540,277 stalled-cycles-backend:u # 0.06% backend cycles idle (75.12%) - 7,358,326,767 instructions:u # 0.84 insn per cycle - # 0.00 stalled cycles per insn (75.25%) - 2.663247898 seconds time elapsed + 6,039,204,312 cycles # 2.872 GHz + 11,937,016,347 instructions # 1.98 insn per cycle + 2.158356809 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 6.626791e-04 -Avg ME (F77/GPU) = 6.6270899361878938E-004 -Relative difference = 4.511024836808726e-05 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262660579844562E-004 +Relative difference = 2.836238137986709e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.124957e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.125610e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.125610e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.204931e-01 +- 3.252405e-01 ) GeV^-4 -TOTAL : 26.786242 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.454006e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.454774e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.454774e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 +TOTAL : 30.077717 sec INFO: No Floating Point Exceptions have been reported - 93,343,664,723 cycles:u # 3.485 GHz (74.99%) - 553,820,580 stalled-cycles-frontend:u # 0.59% frontend cycles idle (75.01%) - 6,589,856,337 stalled-cycles-backend:u # 7.06% backend cycles idle (75.01%) - 133,679,871,960 instructions:u # 1.43 insn per cycle - # 0.05 stalled cycles per insn (75.01%) - 26.793077770 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:16437) (avx2: 0) (512y: 0) (512z: 0) + 86,228,096,895 cycles # 2.867 GHz + 135,581,749,205 instructions # 1.57 insn per cycle + 30.081848617 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:15593) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627534e-04 -Avg ME (F77/C++) = 6.6275342244977858E-004 -Relative difference = 3.387350194093721e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627535e-04 +Avg ME (F77/C++) = 6.6275351196781740E-004 +Relative difference = 1.805772034719401e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 8.092589e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.104600e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.104600e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.211992e-01 +- 3.254573e-01 ) GeV^-4 -TOTAL : 2.037363 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.767198e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.779100e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.779100e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 +TOTAL : 2.432356 sec INFO: No Floating Point Exceptions have been reported - 6,881,540,006 cycles:u # 3.372 GHz (74.96%) - 335,598 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.92%) - 3,042,174,457 stalled-cycles-backend:u # 44.21% backend cycles idle (74.92%) - 19,129,508,750 instructions:u # 2.78 insn per cycle - # 0.16 stalled cycles per insn (74.91%) - 2.044606153 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:68898) (avx2: 0) (512y: 0) (512z: 0) + 6,776,462,064 cycles # 2.783 GHz + 19,386,992,522 instructions # 2.86 insn per cycle + 2.436630257 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:69681) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274857053714997E-004 -Relative difference = 4.445554471174176e-08 +Avg ME (F77/C++) = 6.6274862707273868E-004 +Relative difference = 4.0849182767952624e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.452271e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.456048e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.456048e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.211846e-01 +- 3.254638e-01 ) GeV^-4 -TOTAL : 1.139550 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.415254e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.420302e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.420302e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 +TOTAL : 1.167177 sec INFO: No Floating Point Exceptions have been reported - 3,964,798,022 cycles:u # 3.468 GHz (74.83%) - 1,510,279 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.81%) - 2,247,951,197 stalled-cycles-backend:u # 56.70% backend cycles idle (74.81%) - 6,727,058,780 instructions:u # 1.70 insn per cycle - # 0.33 stalled cycles per insn (74.81%) - 1.147461385 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:48625) (512y: 0) (512z: 0) + 3,174,327,264 cycles # 2.711 GHz + 6,807,988,001 instructions # 2.14 insn per cycle + 1.171487938 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:49077) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627274e-04 -Avg ME (F77/C++) = 6.6272735722101156E-004 -Relative difference = 6.454990161554483e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627273e-04 +Avg ME (F77/C++) = 6.6272731558747466E-004 +Relative difference = 2.3520194007978538e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.702865e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.710950e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.710950e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 +TOTAL : 0.971352 sec +INFO: No Floating Point Exceptions have been reported + 2,641,359,018 cycles # 2.709 GHz + 5,985,956,533 instructions # 2.27 insn per cycle + 0.975633569 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:42677) (512y: 11) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627273e-04 +Avg ME (F77/C++) = 6.6272731558747466E-004 +Relative difference = 2.3520194007978538e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.382138e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.387143e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.387143e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060904e+00 +- 2.367377e+00 ) GeV^-4 +TOTAL : 1.195051 sec +INFO: No Floating Point Exceptions have been reported + 2,079,765,601 cycles # 1.735 GHz + 3,501,460,071 instructions # 1.68 insn per cycle + 1.199295448 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5210) (512y: 3) (512z:44829) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627275e-04 +Avg ME (F77/C++) = 6.6272750363879224E-004 +Relative difference = 5.490631193034436e-09 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt index 7c8363fc8c..f651d28060 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_16:18:45 +DATE: 2024-05-16_14:56:25 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.335496e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.736793e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.737597e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.535870e-02 +- 4.279978e-02 ) GeV^-4 -TOTAL : 0.803562 sec +EvtsPerSec[Rmb+ME] (23) = ( 5.511551e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.549792e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.554590e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 +TOTAL : 0.495451 sec INFO: No Floating Point Exceptions have been reported - 1,181,097,322 cycles:u # 2.657 GHz (74.42%) - 2,196,405 stalled-cycles-frontend:u # 0.19% frontend cycles idle (74.84%) - 5,335,814 stalled-cycles-backend:u # 0.45% backend cycles idle (74.97%) - 1,533,683,700 instructions:u # 1.30 insn per cycle - # 0.00 stalled cycles per insn (76.38%) - 0.850514793 seconds time elapsed + 2,081,587,427 cycles # 2.833 GHz + 3,058,350,902 instructions # 1.47 insn per cycle + 0.791962629 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.422474e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.737257e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.737702e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.763201e+01 +- 6.205778e+01 ) GeV^-4 -TOTAL : 3.381105 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.609535e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.664835e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.667390e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 +TOTAL : 1.882393 sec INFO: No Floating Point Exceptions have been reported - 8,707,695,461 cycles:u # 3.299 GHz (74.76%) - 2,469,193 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.93%) - 5,175,074 stalled-cycles-backend:u # 0.06% backend cycles idle (75.12%) - 7,338,896,918 instructions:u # 0.84 insn per cycle - # 0.00 stalled cycles per insn (75.19%) - 3.430759133 seconds time elapsed + 6,108,387,546 cycles # 2.880 GHz + 12,340,826,531 instructions # 2.02 insn per cycle + 2.177538628 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 6.626791e-04 -Avg ME (F77/GPU) = 6.6270899361878938E-004 -Relative difference = 4.511024836808726e-05 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262660579844562E-004 +Relative difference = 2.836238137986709e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.231441e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.232111e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.232111e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.204931e-01 +- 3.252404e-01 ) GeV^-4 -TOTAL : 26.329235 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.501753e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.502516e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.502516e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 +TOTAL : 29.816879 sec INFO: No Floating Point Exceptions have been reported - 90,935,247,567 cycles:u # 3.454 GHz (75.00%) - 214,862,709 stalled-cycles-frontend:u # 0.24% frontend cycles idle (75.00%) - 6,938,555,145 stalled-cycles-backend:u # 7.63% backend cycles idle (75.00%) - 134,149,203,954 instructions:u # 1.48 insn per cycle - # 0.05 stalled cycles per insn (75.00%) - 26.335826468 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:16752) (avx2: 0) (512y: 0) (512z: 0) + 86,332,810,324 cycles # 2.895 GHz + 136,005,056,328 instructions # 1.58 insn per cycle + 29.820915946 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:15571) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627534e-04 -Avg ME (F77/C++) = 6.6275342811702997E-004 -Relative difference = 4.242457295829522e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627535e-04 +Avg ME (F77/C++) = 6.6275348988418387E-004 +Relative difference = 1.5263316105958472e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 8.209079e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.221181e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.221181e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.211992e-01 +- 3.254573e-01 ) GeV^-4 -TOTAL : 2.007981 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.665500e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.677672e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.677672e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 +TOTAL : 2.468426 sec INFO: No Floating Point Exceptions have been reported - 6,831,424,486 cycles:u # 3.396 GHz (74.97%) - 340,501 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.95%) - 3,243,827,993 stalled-cycles-backend:u # 47.48% backend cycles idle (74.95%) - 19,147,748,926 instructions:u # 2.80 insn per cycle - # 0.17 stalled cycles per insn (74.95%) - 2.014470522 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:68882) (avx2: 0) (512y: 0) (512z: 0) + 6,845,942,231 cycles # 2.769 GHz + 19,438,050,467 instructions # 2.84 insn per cycle + 2.472841886 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:69723) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274857044990032E-004 -Relative difference = 4.4587192899226015e-08 +Avg ME (F77/C++) = 6.6274862764021530E-004 +Relative difference = 4.170542995014107e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.500161e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.504209e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.504209e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.211846e-01 +- 3.254638e-01 ) GeV^-4 -TOTAL : 1.103309 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.417376e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.422501e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.422501e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 +TOTAL : 1.165327 sec INFO: No Floating Point Exceptions have been reported - 3,841,380,021 cycles:u # 3.470 GHz (74.79%) - 743,882 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.71%) - 2,188,767,593 stalled-cycles-backend:u # 56.98% backend cycles idle (74.71%) - 6,678,743,261 instructions:u # 1.74 insn per cycle - # 0.33 stalled cycles per insn (74.91%) - 1.114464641 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:47416) (512y: 0) (512z: 0) + 3,124,352,057 cycles # 2.673 GHz + 6,718,803,660 instructions # 2.15 insn per cycle + 1.169556736 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:47667) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627274e-04 -Avg ME (F77/C++) = 6.6272735755491807E-004 -Relative difference = 6.404606472340801e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627273e-04 +Avg ME (F77/C++) = 6.6272731651051409E-004 +Relative difference = 2.4912983202981302e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.691458e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.698947e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.698947e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 +TOTAL : 0.977716 sec +INFO: No Floating Point Exceptions have been reported + 2,637,169,918 cycles # 2.688 GHz + 5,969,286,098 instructions # 2.26 insn per cycle + 0.981942660 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:41842) (512y: 13) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627273e-04 +Avg ME (F77/C++) = 6.6272731651051409E-004 +Relative difference = 2.4912983202981302e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.359514e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.364207e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.364207e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060904e+00 +- 2.367377e+00 ) GeV^-4 +TOTAL : 1.214640 sec +INFO: No Floating Point Exceptions have been reported + 2,077,190,375 cycles # 1.705 GHz + 3,494,266,618 instructions # 1.68 insn per cycle + 1.219012886 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4174) (512y: 4) (512z:44472) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627275e-04 +Avg ME (F77/C++) = 6.6272750384530066E-004 +Relative difference = 5.80223501432476e-09 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 9d0d7beb2c..19b36f52e3 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_16:00:47 +DATE: 2024-05-16_14:41:41 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.267158e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.440515e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.441290e+04 ) sec^-1 -MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 -TOTAL : 0.640624 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.461685e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.485212e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.487571e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.529512 sec INFO: No Floating Point Exceptions have been reported - 1,970,993,277 cycles:u # 2.996 GHz (73.71%) - 2,234,518 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.07%) - 5,770,515 stalled-cycles-backend:u # 0.29% backend cycles idle (75.40%) - 2,169,407,546 instructions:u # 1.10 insn per cycle - # 0.00 stalled cycles per insn (75.69%) - 0.686561211 seconds time elapsed + 2,178,837,447 cycles # 2.822 GHz + 3,364,663,947 instructions # 1.54 insn per cycle + 0.830788537 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.241178e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.246579e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.246636e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 -TOTAL : 7.701668 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.126264e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.153475e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.154649e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.029127 sec INFO: No Floating Point Exceptions have been reported - 26,494,625,312 cycles:u # 3.436 GHz (75.00%) - 3,015,768 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.03%) - 5,017,372 stalled-cycles-backend:u # 0.02% backend cycles idle (74.96%) - 21,118,215,281 instructions:u # 0.80 insn per cycle - # 0.00 stalled cycles per insn (74.96%) - 7.755036739 seconds time elapsed + 9,370,132,482 cycles # 2.854 GHz + 19,961,685,193 instructions # 2.13 insn per cycle + 3.339329204 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266732376103494E-004 Relative difference = 2.659538381540814e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.184089e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.184941e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.184941e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 7.520886 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.814801e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.815638e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.815638e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 9.046315 sec INFO: No Floating Point Exceptions have been reported - 26,339,863,085 cycles:u # 3.501 GHz (75.02%) - 27,556,319 stalled-cycles-frontend:u # 0.10% frontend cycles idle (75.02%) - 3,725,587,826 stalled-cycles-backend:u # 14.14% backend cycles idle (75.02%) - 82,342,494,882 instructions:u # 3.13 insn per cycle - # 0.05 stalled cycles per insn (75.02%) - 7.528178258 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 6623) (avx2: 0) (512y: 0) (512z: 0) + 25,898,843,827 cycles # 2.862 GHz + 79,438,691,532 instructions # 3.07 insn per cycle + 9.050548799 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4858) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731406016235E-004 Relative difference = 2.8059296349552523e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 5.104365e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.109013e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.109013e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.224895 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.427576e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.430583e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.430583e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.794109 sec INFO: No Floating Point Exceptions have been reported - 11,294,876,919 cycles:u # 3.499 GHz (75.01%) - 3,584,548 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.97%) - 1,420,624,378 stalled-cycles-backend:u # 12.58% backend cycles idle (74.97%) - 38,448,878,910 instructions:u # 3.40 insn per cycle - # 0.04 stalled cycles per insn (74.97%) - 3.231777532 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:12755) (avx2: 0) (512y: 0) (512z: 0) + 12,707,110,349 cycles # 2.649 GHz + 38,549,995,901 instructions # 3.03 insn per cycle + 4.798432518 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13163) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730246908442E-004 Relative difference = 2.98084507782618e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.221185e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.223860e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.223860e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.354741 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.947160e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.962973e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.962973e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.073357 sec INFO: No Floating Point Exceptions have been reported - 4,740,411,287 cycles:u # 3.490 GHz (74.68%) - 293,447 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.83%) - 408,157,629 stalled-cycles-backend:u # 8.61% backend cycles idle (75.13%) - 13,512,450,760 instructions:u # 2.85 insn per cycle - # 0.03 stalled cycles per insn (75.27%) - 1.362546494 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10944) (512y: 0) (512z: 0) + 5,517,673,615 cycles # 2.658 GHz + 13,479,814,632 instructions # 2.44 insn per cycle + 2.077628129 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11242) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266730409276836E-004 -Relative difference = 2.9563428359824236e-07 +Avg ME (F77/C++) = 6.6266730409276857E-004 +Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.092853e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.113453e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.113453e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.813039 sec +INFO: No Floating Point Exceptions have been reported + 4,828,852,439 cycles # 2.658 GHz + 12,135,084,334 instructions # 2.51 insn per cycle + 1.817332368 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10154) (512y: 79) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266730409276857E-004 +Relative difference = 2.956342832710188e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.671146e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.682190e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.682190e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.468744 sec +INFO: No Floating Point Exceptions have been reported + 4,141,507,976 cycles # 1.676 GHz + 6,337,241,929 instructions # 1.53 insn per cycle + 2.472886901 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1803) (512y: 93) (512z: 9358) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266730409276857E-004 +Relative difference = 2.956342832710188e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt index c299a88ec3..10c707e81e 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-05-16_16:01:14 +DATE: 2024-05-16_14:42:15 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.233929e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.442774e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.443041e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 -TOTAL : 0.549444 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.482619e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.506518e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.508744e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.530792 sec INFO: No Floating Point Exceptions have been reported - 1,487,157,955 cycles:u # 2.746 GHz (75.03%) - 2,201,231 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.76%) - 5,308,721 stalled-cycles-backend:u # 0.36% backend cycles idle (74.67%) - 1,826,761,476 instructions:u # 1.23 insn per cycle - # 0.00 stalled cycles per insn (74.94%) - 0.610510696 seconds time elapsed + 2,175,803,522 cycles # 2.817 GHz + 3,378,965,043 instructions # 1.55 insn per cycle + 0.832396723 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.732620e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.741273e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.741383e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 -TOTAL : 6.357089 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.148343e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.175835e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.177034e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.021680 sec INFO: No Floating Point Exceptions have been reported - 21,757,949,392 cycles:u # 3.414 GHz (75.05%) - 2,871,375 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.11%) - 5,545,383 stalled-cycles-backend:u # 0.03% backend cycles idle (75.08%) - 17,476,889,330 instructions:u # 0.80 insn per cycle - # 0.00 stalled cycles per insn (75.02%) - 6.410991416 seconds time elapsed + 9,373,127,330 cycles # 2.855 GHz + 21,008,547,067 instructions # 2.24 insn per cycle + 3.339100414 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266732376103494E-004 Relative difference = 2.659538381540814e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.214949e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.215824e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.215824e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 7.416114 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.816314e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.817146e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.817146e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 9.038908 sec INFO: No Floating Point Exceptions have been reported - 25,997,090,548 cycles:u # 3.504 GHz (75.00%) - 2,505,020 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.99%) - 3,347,857,668 stalled-cycles-backend:u # 12.88% backend cycles idle (74.99%) - 82,247,523,819 instructions:u # 3.16 insn per cycle - # 0.04 stalled cycles per insn (74.99%) - 7.423065008 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 6491) (avx2: 0) (512y: 0) (512z: 0) + 25,883,565,851 cycles # 2.863 GHz + 79,454,182,113 instructions # 3.07 insn per cycle + 9.043041112 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4505) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731406016235E-004 Relative difference = 2.8059296349552523e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 5.110095e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.114739e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.114739e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.221131 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.444359e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.447378e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.447378e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.770841 sec INFO: No Floating Point Exceptions have been reported - 11,269,898,163 cycles:u # 3.495 GHz (74.95%) - 4,697,638 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.95%) - 1,304,176,959 stalled-cycles-backend:u # 11.57% backend cycles idle (74.95%) - 38,519,332,782 instructions:u # 3.42 insn per cycle - # 0.03 stalled cycles per insn (74.95%) - 3.228176990 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:12729) (avx2: 0) (512y: 0) (512z: 0) + 12,673,930,469 cycles # 2.656 GHz + 38,521,208,960 instructions # 3.04 insn per cycle + 4.775036357 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:12930) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730246908442E-004 Relative difference = 2.98084507782618e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.227037e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.229728e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.229728e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.348175 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.869925e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.885399e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.885399e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.093382 sec INFO: No Floating Point Exceptions have been reported - 4,737,521,447 cycles:u # 3.504 GHz (74.62%) - 291,147 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.87%) - 484,828,585 stalled-cycles-backend:u # 10.23% backend cycles idle (75.15%) - 13,528,004,531 instructions:u # 2.86 insn per cycle - # 0.04 stalled cycles per insn (75.15%) - 1.354978019 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10926) (512y: 0) (512z: 0) + 5,571,627,209 cycles # 2.657 GHz + 13,607,217,607 instructions # 2.44 insn per cycle + 2.097652206 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11327) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266730409276836E-004 -Relative difference = 2.9563428359824236e-07 +Avg ME (F77/C++) = 6.6266730409276857E-004 +Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.920636e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.941759e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.941759e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.847928 sec +INFO: No Floating Point Exceptions have been reported + 4,911,801,030 cycles # 2.653 GHz + 12,271,296,407 instructions # 2.50 insn per cycle + 1.852091714 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10143) (512y: 239) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266730409276857E-004 +Relative difference = 2.956342832710188e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.658898e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.670376e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.670376e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.472418 sec +INFO: No Floating Point Exceptions have been reported + 4,148,038,447 cycles # 1.675 GHz + 6,442,551,576 instructions # 1.55 insn per cycle + 2.476725391 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1628) (512y: 191) (512z: 9356) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266730409276857E-004 +Relative difference = 2.956342832710188e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index 3d0d3b15bb..5b5bd116a3 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-16_16:02:08 +DATE: 2024-05-16_14:44:06 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.861034e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.868579e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.868634e+01 ) sec^-1 -MeanMatrixElemValue = ( 3.297256e-04 +- 2.011325e-04 ) GeV^-6 -TOTAL : 9.607645 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.065678e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.066059e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.066269e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 2.433623 sec INFO: No Floating Point Exceptions have been reported - 29,231,885,707 cycles:u # 3.037 GHz (74.94%) - 3,405,584 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.93%) - 10,029,785 stalled-cycles-backend:u # 0.03% backend cycles idle (74.99%) - 23,193,995,344 instructions:u # 0.79 insn per cycle - # 0.00 stalled cycles per insn (74.99%) - 9.660944397 seconds time elapsed + 7,848,341,490 cycles # 2.847 GHz + 17,462,165,188 instructions # 2.22 insn per cycle + 2.813787845 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.508374e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.521642e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.521668e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.973353e-04 +- 5.853892e-04 ) GeV^-6 -TOTAL : 9.126743 sec +EvtsPerSec[Rmb+ME] (23) = ( 9.279047e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.280919e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.281197e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 3.985795 sec INFO: No Floating Point Exceptions have been reported - 27,595,818,037 cycles:u # 3.033 GHz (74.94%) - 3,263,343 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.01%) - 9,195,348 stalled-cycles-backend:u # 0.03% backend cycles idle (75.06%) - 21,903,563,579 instructions:u # 0.79 insn per cycle - # 0.00 stalled cycles per insn (75.05%) - 9.180327173 seconds time elapsed + 12,317,841,072 cycles # 2.855 GHz + 29,065,647,551 instructions # 2.36 insn per cycle + 4.369250222 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 9.872263e-03 -Avg ME (F77/GPU) = 9.8722595284406710E-003 -Relative difference = 3.516477760164775e-07 +Avg ME (F77/GPU) = 9.8722595284406640E-003 +Relative difference = 3.5164777671934515e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.029752e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.029779e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.029779e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 5.133518 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.769533e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.769744e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.769744e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 6.808252 sec INFO: No Floating Point Exceptions have been reported - 17,970,736,677 cycles:u # 3.498 GHz (74.93%) - 25,445,093 stalled-cycles-frontend:u # 0.14% frontend cycles idle (74.93%) - 2,114,620,206 stalled-cycles-backend:u # 11.77% backend cycles idle (74.96%) - 55,145,918,794 instructions:u # 3.07 insn per cycle - # 0.04 stalled cycles per insn (75.04%) - 5.143293014 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:44874) (avx2: 0) (512y: 0) (512z: 0) + 18,798,528,369 cycles # 2.760 GHz + 53,916,630,138 instructions # 2.87 insn per cycle + 6.812355714 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32448) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.236434e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.236573e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.236573e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.366565 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.539980e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.540062e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.540062e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 3.433152 sec INFO: No Floating Point Exceptions have been reported - 8,276,572,707 cycles:u # 3.492 GHz (75.02%) - 1,944,212 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.02%) - 776,775,939 stalled-cycles-backend:u # 9.39% backend cycles idle (75.02%) - 26,993,949,937 instructions:u # 3.26 insn per cycle - # 0.03 stalled cycles per insn (75.02%) - 2.373131093 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:97234) (avx2: 0) (512y: 0) (512z: 0) + 9,799,231,624 cycles # 2.852 GHz + 27,092,581,938 instructions # 2.76 insn per cycle + 3.437235180 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:96443) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 5.196553e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.197233e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.197233e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.021705 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.326889e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.327284e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.327284e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.592072 sec INFO: No Floating Point Exceptions have been reported - 3,566,060,493 cycles:u # 3.479 GHz (75.04%) - 1,630,557 stalled-cycles-frontend:u # 0.05% frontend cycles idle (75.03%) - 272,703,197 stalled-cycles-backend:u # 7.65% backend cycles idle (75.03%) - 9,519,525,811 instructions:u # 2.67 insn per cycle - # 0.03 stalled cycles per insn (75.03%) - 1.028233480 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84279) (512y: 0) (512z: 0) + 4,220,179,984 cycles # 2.645 GHz + 9,560,887,701 instructions # 2.27 insn per cycle + 1.596045022 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84390) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.770010e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.770613e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.770613e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.405649 sec +INFO: No Floating Point Exceptions have been reported + 3,726,923,548 cycles # 2.645 GHz + 8,484,897,516 instructions # 2.28 insn per cycle + 1.409716339 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:79991) (512y: 91) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.281739e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.282258e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.282258e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.614138 sec +INFO: No Floating Point Exceptions have been reported + 2,690,163,143 cycles # 1.663 GHz + 4,272,866,756 instructions # 1.59 insn per cycle + 1.618172762 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2284) (512y: 105) (512z:79105) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt index 98059781c6..1be1cfeedf 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt @@ -1,204 +1,250 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-16_16:29:06 +DATE: 2024-05-16_15:03:52 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.810560e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.811281e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.811281e+01 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 9.602411 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.063403e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.064386e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.064386e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 2.395260 sec INFO: No Floating Point Exceptions have been reported - 33,112,297,027 cycles:u # 3.443 GHz (75.03%) - 3,422,513 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.05%) - 10,504,866 stalled-cycles-backend:u # 0.03% backend cycles idle (75.04%) - 26,153,799,203 instructions:u # 0.79 insn per cycle - # 0.00 stalled cycles per insn (74.98%) - 9.655106468 seconds time elapsed + 7,755,019,436 cycles # 2.852 GHz + 17,230,726,903 instructions # 2.22 insn per cycle + 2.775067655 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.527353e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.530982e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.530982e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.221264e+00 +- 1.219329e+00 ) GeV^-6 -TOTAL : 9.065470 sec +EvtsPerSec[Rmb+ME] (23) = ( 9.263412e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.297943e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.297943e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 3.971807 sec INFO: No Floating Point Exceptions have been reported - 31,120,573,577 cycles:u # 3.431 GHz (74.97%) - 4,565,890 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.96%) - 60,864,496 stalled-cycles-backend:u # 0.20% backend cycles idle (74.97%) - 24,601,274,090 instructions:u # 0.79 insn per cycle - # 0.00 stalled cycles per insn (74.98%) - 9.113362546 seconds time elapsed + 12,283,103,403 cycles # 2.855 GHz + 27,758,308,143 instructions # 2.26 insn per cycle + 4.357937638 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 9.872263e-03 -Avg ME (F77/GPU) = 9.8722595284406710E-003 -Relative difference = 3.516477760164775e-07 +Avg ME (F77/GPU) = 9.8722595284406640E-003 +Relative difference = 3.5164777671934515e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.028478e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.028506e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.028506e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 5.140219 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.361041e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.361236e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.361236e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 7.177191 sec INFO: No Floating Point Exceptions have been reported - 17,984,375,920 cycles:u # 3.497 GHz (74.96%) - 24,790,369 stalled-cycles-frontend:u # 0.14% frontend cycles idle (74.96%) - 2,154,971,354 stalled-cycles-backend:u # 11.98% backend cycles idle (74.96%) - 55,207,882,428 instructions:u # 3.07 insn per cycle - # 0.04 stalled cycles per insn (74.98%) - 5.149502955 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:44874) (avx2: 0) (512y: 0) (512z: 0) + 18,941,402,140 cycles # 2.638 GHz + 53,918,413,850 instructions # 2.85 insn per cycle + 7.181202320 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32448) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.213003e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.213144e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.213144e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.393300 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.538220e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.538308e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.538308e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 3.437404 sec INFO: No Floating Point Exceptions have been reported - 8,244,856,086 cycles:u # 3.440 GHz (74.98%) - 1,395,959 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.97%) - 824,554,021 stalled-cycles-backend:u # 10.00% backend cycles idle (74.97%) - 26,990,902,568 instructions:u # 3.27 insn per cycle - # 0.03 stalled cycles per insn (74.97%) - 2.400361461 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:97234) (avx2: 0) (512y: 0) (512z: 0) + 9,826,263,323 cycles # 2.856 GHz + 27,093,421,705 instructions # 2.76 insn per cycle + 3.441586183 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:96443) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 5.165671e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.166363e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.166363e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.027888 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.320877e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.321321e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.321321e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.595267 sec INFO: No Floating Point Exceptions have been reported - 3,583,881,539 cycles:u # 3.475 GHz (74.81%) - 1,492,642 stalled-cycles-frontend:u # 0.04% frontend cycles idle (75.17%) - 342,634,299 stalled-cycles-backend:u # 9.56% backend cycles idle (75.18%) - 9,515,596,355 instructions:u # 2.66 insn per cycle - # 0.04 stalled cycles per insn (75.18%) - 1.034777693 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84279) (512y: 0) (512z: 0) + 4,226,325,559 cycles # 2.644 GHz + 9,562,000,988 instructions # 2.26 insn per cycle + 1.599357751 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84390) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=256) +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.768698e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.769266e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.769266e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.406299 sec +INFO: No Floating Point Exceptions have been reported + 3,728,202,948 cycles # 2.645 GHz + 8,485,828,873 instructions # 2.28 insn per cycle + 1.410433353 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:79991) (512y: 91) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=256) +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.279021e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.279511e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.279511e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.615850 sec +INFO: No Floating Point Exceptions have been reported + 2,693,497,833 cycles # 1.663 GHz + 4,273,840,765 instructions # 1.59 insn per cycle + 1.620067219 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2284) (512y: 105) (512z:79105) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt index 03f1c7030a..3f519fda03 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-16_16:03:26 +DATE: 2024-05-16_14:45:01 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/check_hip.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.950174e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.955143e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.955187e+01 ) sec^-1 -MeanMatrixElemValue = ( 3.297256e-04 +- 2.011325e-04 ) GeV^-6 -TOTAL : 9.516879 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.065431e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.065819e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.066015e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 2.431749 sec INFO: No Floating Point Exceptions have been reported - 32,591,472,838 cycles:u # 3.417 GHz (75.01%) - 3,390,992 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.04%) - 8,753,687 stalled-cycles-backend:u # 0.03% backend cycles idle (75.02%) - 25,746,238,931 instructions:u # 0.79 insn per cycle - # 0.00 stalled cycles per insn (75.02%) - 9.568277752 seconds time elapsed + 7,858,443,167 cycles # 2.852 GHz + 17,797,449,482 instructions # 2.26 insn per cycle + 2.810886675 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.511919e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.526871e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.526891e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.973353e-04 +- 5.853892e-04 ) GeV^-6 -TOTAL : 9.059256 sec +EvtsPerSec[Rmb+ME] (23) = ( 9.189050e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.190883e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.191132e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 4.007219 sec INFO: No Floating Point Exceptions have been reported - 30,902,483,238 cycles:u # 3.416 GHz (74.99%) - 3,318,629 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.91%) - 6,492,525 stalled-cycles-backend:u # 0.02% backend cycles idle (74.92%) - 24,471,874,069 instructions:u # 0.79 insn per cycle - # 0.00 stalled cycles per insn (75.03%) - 9.106634823 seconds time elapsed + 12,388,147,716 cycles # 2.856 GHz + 29,572,084,158 instructions # 2.39 insn per cycle + 4.393669645 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 9.872263e-03 -Avg ME (F77/GPU) = 9.8722595284406710E-003 -Relative difference = 3.516477760164775e-07 +Avg ME (F77/GPU) = 9.8722595284406640E-003 +Relative difference = 3.5164777671934515e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.022350e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.022377e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.022377e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 5.169976 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.555801e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.556015e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.556015e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 6.995892 sec INFO: No Floating Point Exceptions have been reported - 18,013,433,306 cycles:u # 3.482 GHz (74.95%) - 25,779,908 stalled-cycles-frontend:u # 0.14% frontend cycles idle (74.95%) - 2,184,560,790 stalled-cycles-backend:u # 12.13% backend cycles idle (74.95%) - 55,161,919,505 instructions:u # 3.06 insn per cycle - # 0.04 stalled cycles per insn (75.00%) - 5.199827912 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:44747) (avx2: 0) (512y: 0) (512z: 0) + 18,868,341,179 cycles # 2.696 GHz + 53,930,114,085 instructions # 2.86 insn per cycle + 6.999840535 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32063) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.224891e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.225027e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.225027e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.378492 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.547899e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.547983e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.547983e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 3.415669 sec INFO: No Floating Point Exceptions have been reported - 8,291,005,243 cycles:u # 3.481 GHz (74.81%) - 1,344,765 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.87%) - 814,344,181 stalled-cycles-backend:u # 9.82% backend cycles idle (75.04%) - 27,026,510,570 instructions:u # 3.26 insn per cycle - # 0.03 stalled cycles per insn (75.15%) - 2.385036711 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:97230) (avx2: 0) (512y: 0) (512z: 0) + 9,762,163,827 cycles # 2.856 GHz + 27,089,755,364 instructions # 2.77 insn per cycle + 3.419663266 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:96286) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 5.117019e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.117685e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.117685e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.036871 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.328520e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.328923e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.328923e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.591769 sec INFO: No Floating Point Exceptions have been reported - 3,600,168,919 cycles:u # 3.461 GHz (74.69%) - 1,438,828 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.62%) - 295,250,423 stalled-cycles-backend:u # 8.20% backend cycles idle (74.65%) - 9,556,356,421 instructions:u # 2.65 insn per cycle - # 0.03 stalled cycles per insn (75.03%) - 1.045363830 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84249) (512y: 0) (512z: 0) + 4,217,350,816 cycles # 2.647 GHz + 9,560,856,496 instructions # 2.27 insn per cycle + 1.595749154 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84478) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.765235e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.765802e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.765802e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.407302 sec +INFO: No Floating Point Exceptions have been reported + 3,737,969,275 cycles # 2.650 GHz + 8,484,674,655 instructions # 2.27 insn per cycle + 1.411406372 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80014) (512y: 241) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.273845e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.274322e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.274322e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.617639 sec +INFO: No Floating Point Exceptions have been reported + 2,695,774,477 cycles # 1.663 GHz + 4,276,120,388 instructions # 1.59 insn per cycle + 1.621698890 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2169) (512y: 187) (512z:79110) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index ef65546ab4..8097294660 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-16_16:04:41 +DATE: 2024-05-16_14:45:55 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.780984e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.784144e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.784162e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.256794e-06 +- 4.775721e-07 ) GeV^-6 -TOTAL : 4.541182 sec +EvtsPerSec[Rmb+ME] (23) = ( 6.560287e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.561087e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.561509e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 +TOTAL : 1.688313 sec INFO: No Floating Point Exceptions have been reported - 15,513,219,865 cycles:u # 3.400 GHz (74.94%) - 2,866,959 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.92%) - 6,407,246 stalled-cycles-backend:u # 0.04% backend cycles idle (75.01%) - 12,595,649,559 instructions:u # 0.81 insn per cycle - # 0.00 stalled cycles per insn (75.05%) - 4.592083956 seconds time elapsed + 5,610,919,333 cycles # 2.843 GHz + 12,076,970,192 instructions # 2.15 insn per cycle + 2.032164963 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.212933e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.234752e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.234835e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.930014e-02 +- 1.363569e-02 ) GeV^-6 -TOTAL : 4.726997 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.335524e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.336187e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.336332e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856829e-04 +- 8.333435e-05 ) GeV^-6 +TOTAL : 1.920912 sec INFO: No Floating Point Exceptions have been reported - 16,074,409,092 cycles:u # 3.393 GHz (75.01%) - 2,704,019 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.02%) - 6,676,651 stalled-cycles-backend:u # 0.04% backend cycles idle (75.02%) - 13,068,602,962 instructions:u # 0.81 insn per cycle - # 0.00 stalled cycles per insn (75.06%) - 4.769822783 seconds time elapsed + 6,262,064,127 cycles # 2.846 GHz + 13,866,454,713 instructions # 2.21 insn per cycle + 2.256561773 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 9.855155e-03 -Avg ME (F77/GPU) = 9.8696023209835834E-003 -Relative difference = 0.0014659658811639687 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.849636e-03 +Avg ME (F77/GPU) = 9.8712405367667715E-003 +Relative difference = 0.0021934350433631634 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.092658e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.092688e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.092688e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.924324e-03 +- 4.918778e-03 ) GeV^-6 -TOTAL : 4.836929 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.473644e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.473896e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.473896e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 +TOTAL : 6.236471 sec INFO: No Floating Point Exceptions have been reported - 16,829,785,934 cycles:u # 3.477 GHz (74.94%) - 17,294,263 stalled-cycles-frontend:u # 0.10% frontend cycles idle (75.02%) - 1,652,705,918 stalled-cycles-backend:u # 9.82% backend cycles idle (75.04%) - 51,748,648,616 instructions:u # 3.07 insn per cycle - # 0.03 stalled cycles per insn (75.04%) - 4.843459172 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:27812) (avx2: 0) (512y: 0) (512z: 0) + 17,834,532,335 cycles # 2.858 GHz + 53,589,179,622 instructions # 3.00 insn per cycle + 6.240522901 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:20208) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087414119E-003 -Relative difference = 2.1196409216982896e-08 +Avg ME (F77/C++) = 9.8479612087541066E-003 +Relative difference = 2.1197698286506752e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.600756e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.601329e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.601329e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.924322e-03 +- 4.918776e-03 ) GeV^-6 -TOTAL : 1.153223 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.311314e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.311704e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.311704e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 +TOTAL : 1.599839 sec INFO: No Floating Point Exceptions have been reported - 4,038,703,945 cycles:u # 3.491 GHz (74.79%) - 643,845 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.11%) - 407,380,888 stalled-cycles-backend:u # 10.09% backend cycles idle (75.11%) - 13,691,102,245 instructions:u # 3.39 insn per cycle - # 0.03 stalled cycles per insn (75.11%) - 1.169033845 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:97762) (avx2: 0) (512y: 0) (512z: 0) + 4,578,829,094 cycles # 2.856 GHz + 13,761,810,246 instructions # 3.01 insn per cycle + 1.603811766 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:96986) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.847957e-03 -Avg ME (F77/C++) = 9.8479574833965355E-003 -Relative difference = 4.9085971470122835e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.847955e-03 +Avg ME (F77/C++) = 9.8479546896527003E-003 +Relative difference = 3.151388282563952e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.018113e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.018365e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.018365e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 -TOTAL : 0.524433 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.636666e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.638274e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.638274e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.800488 sec INFO: No Floating Point Exceptions have been reported - 1,791,692,598 cycles:u # 3.395 GHz (74.36%) - 910,702 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.23%) - 168,345,930 stalled-cycles-backend:u # 9.40% backend cycles idle (74.39%) - 4,819,680,819 instructions:u # 2.69 insn per cycle - # 0.03 stalled cycles per insn (75.16%) - 0.531951240 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84831) (512y: 0) (512z: 0) + 2,129,570,848 cycles # 2.649 GHz + 4,816,093,977 instructions # 2.26 insn per cycle + 0.804523713 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84904) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 -Avg ME (F77/C++) = 9.8929728161012351E-003 -Relative difference = 1.8588827066662492e-08 +Avg ME (F77/C++) = 9.8929728161070551E-003 +Relative difference = 1.858823877057982e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.627354e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.629498e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.629498e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.697299 sec +INFO: No Floating Point Exceptions have been reported + 1,857,131,979 cycles # 2.651 GHz + 4,273,320,598 instructions # 2.30 insn per cycle + 0.701213399 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80610) (512y: 46) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.892973e-03 +Avg ME (F77/C++) = 9.8929728161070551E-003 +Relative difference = 1.858823877057982e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.540089e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.542023e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.542023e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 +TOTAL : 0.812936 sec +INFO: No Floating Point Exceptions have been reported + 1,360,618,833 cycles # 1.668 GHz + 2,159,125,772 instructions # 1.59 insn per cycle + 0.816997353 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2890) (512y: 49) (512z:79305) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.892981e-03 +Avg ME (F77/C++) = 9.8929811982958280E-003 +Relative difference = 2.0044092642523172e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt index af9847b5bc..6d352d97ac 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt @@ -1,204 +1,250 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-16_16:30:22 +DATE: 2024-05-16_15:04:47 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.745000e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.745330e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.745330e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.935145e-03 +- 4.929588e-03 ) GeV^-6 -TOTAL : 4.622996 sec +EvtsPerSec[Rmb+ME] (23) = ( 6.582684e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.584567e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.584567e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187094e-05 +- 9.825664e-06 ) GeV^-6 +TOTAL : 1.633735 sec INFO: No Floating Point Exceptions have been reported - 15,867,233,439 cycles:u # 3.416 GHz (74.96%) - 2,681,610 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.03%) - 7,639,743 stalled-cycles-backend:u # 0.05% backend cycles idle (75.04%) - 12,859,486,940 instructions:u # 0.81 insn per cycle - # 0.00 stalled cycles per insn (75.01%) - 4.666755965 seconds time elapsed + 5,453,177,396 cycles # 2.847 GHz + 11,627,188,509 instructions # 2.13 insn per cycle + 1.971399165 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.260170e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.276763e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.276763e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.258769e+00 +- 1.256832e+00 ) GeV^-6 -TOTAL : 4.745808 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.292389e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.306150e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.306150e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856441e-04 +- 8.331096e-05 ) GeV^-6 +TOTAL : 1.932378 sec INFO: No Floating Point Exceptions have been reported - 16,204,974,490 cycles:u # 3.403 GHz (75.02%) - 3,613,716 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.00%) - 47,404,694 stalled-cycles-backend:u # 0.29% backend cycles idle (74.97%) - 13,139,458,360 instructions:u # 0.81 insn per cycle - # 0.00 stalled cycles per insn (74.91%) - 4.790366579 seconds time elapsed + 6,313,737,946 cycles # 2.848 GHz + 13,568,150,990 instructions # 2.15 insn per cycle + 2.274068662 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 9.855155e-03 -Avg ME (F77/GPU) = 9.8696023209835834E-003 -Relative difference = 0.0014659658811639687 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.849636e-03 +Avg ME (F77/GPU) = 9.8712405367667715E-003 +Relative difference = 0.0021934350433631634 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.096616e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.096647e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.096647e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.924324e-03 +- 4.918778e-03 ) GeV^-6 -TOTAL : 4.819941 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.447529e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.447785e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.447785e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 +TOTAL : 6.255519 sec INFO: No Floating Point Exceptions have been reported - 16,787,534,744 cycles:u # 3.481 GHz (74.97%) - 15,818,656 stalled-cycles-frontend:u # 0.09% frontend cycles idle (74.95%) - 1,742,758,069 stalled-cycles-backend:u # 10.38% backend cycles idle (74.95%) - 51,728,330,987 instructions:u # 3.08 insn per cycle - # 0.03 stalled cycles per insn (74.95%) - 4.826082829 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:27812) (avx2: 0) (512y: 0) (512z: 0) + 17,871,844,477 cycles # 2.856 GHz + 53,590,423,890 instructions # 3.00 insn per cycle + 6.259496797 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:20208) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087414119E-003 -Relative difference = 2.1196409216982896e-08 +Avg ME (F77/C++) = 9.8479612087541066E-003 +Relative difference = 2.1197698286506752e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.606640e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.607172e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.607172e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.924322e-03 +- 4.918776e-03 ) GeV^-6 -TOTAL : 1.152001 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.319002e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.319403e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.319403e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 +TOTAL : 1.595862 sec INFO: No Floating Point Exceptions have been reported - 4,025,017,670 cycles:u # 3.483 GHz (75.00%) - 659,426 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.08%) - 376,740,682 stalled-cycles-backend:u # 9.36% backend cycles idle (75.08%) - 13,709,516,953 instructions:u # 3.41 insn per cycle - # 0.03 stalled cycles per insn (75.08%) - 1.158459303 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:97762) (avx2: 0) (512y: 0) (512z: 0) + 4,573,738,949 cycles # 2.860 GHz + 13,762,785,828 instructions # 3.01 insn per cycle + 1.599904345 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:96986) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.847957e-03 -Avg ME (F77/C++) = 9.8479574833965355E-003 -Relative difference = 4.9085971470122835e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.847955e-03 +Avg ME (F77/C++) = 9.8479546896527003E-003 +Relative difference = 3.151388282563952e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 9.974105e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.976536e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.976536e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 -TOTAL : 0.534779 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.613525e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.615218e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.615218e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.804350 sec INFO: No Floating Point Exceptions have been reported - 1,791,388,173 cycles:u # 3.329 GHz (74.77%) - 680,193 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.73%) - 160,894,614 stalled-cycles-backend:u # 8.98% backend cycles idle (74.73%) - 4,799,712,443 instructions:u # 2.68 insn per cycle - # 0.03 stalled cycles per insn (74.73%) - 0.541554035 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84831) (512y: 0) (512z: 0) + 2,139,167,872 cycles # 2.648 GHz + 4,817,111,626 instructions # 2.25 insn per cycle + 0.808508083 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84904) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 -Avg ME (F77/C++) = 9.8929728161012351E-003 -Relative difference = 1.8588827066662492e-08 +Avg ME (F77/C++) = 9.8929728161070551E-003 +Relative difference = 1.858823877057982e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=256) +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.603124e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.605216e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.605216e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.699524 sec +INFO: No Floating Point Exceptions have been reported + 1,862,402,974 cycles # 2.650 GHz + 4,274,167,467 instructions # 2.29 insn per cycle + 0.703628099 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80610) (512y: 46) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.892973e-03 +Avg ME (F77/C++) = 9.8929728161070551E-003 +Relative difference = 1.858823877057982e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=256) +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.580110e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.582197e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.582197e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 +TOTAL : 0.807828 sec +INFO: No Floating Point Exceptions have been reported + 1,354,037,726 cycles # 1.669 GHz + 2,159,114,420 instructions # 1.59 insn per cycle + 0.811949308 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2890) (512y: 49) (512z:79305) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.892981e-03 +Avg ME (F77/C++) = 9.8929811982958280E-003 +Relative difference = 2.0044092642523172e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt index 29dcec006d..2d3f5a3740 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-16_16:05:29 +DATE: 2024-05-16_14:46:34 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/check_hip.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.794480e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.797565e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.797585e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.256794e-06 +- 4.775721e-07 ) GeV^-6 -TOTAL : 4.575830 sec +EvtsPerSec[Rmb+ME] (23) = ( 6.539024e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.539847e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.540280e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 +TOTAL : 1.689103 sec INFO: No Floating Point Exceptions have been reported - 15,575,228,231 cycles:u # 3.399 GHz (75.04%) - 2,805,607 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.03%) - 5,139,054 stalled-cycles-backend:u # 0.03% backend cycles idle (74.91%) - 12,697,850,039 instructions:u # 0.82 insn per cycle - # 0.00 stalled cycles per insn (74.93%) - 4.620142781 seconds time elapsed + 5,627,474,622 cycles # 2.847 GHz + 11,923,534,222 instructions # 2.12 insn per cycle + 2.035228412 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.257559e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.282938e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.283009e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.930014e-02 +- 1.363569e-02 ) GeV^-6 -TOTAL : 4.706697 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.303686e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.304329e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.304469e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856829e-04 +- 8.333435e-05 ) GeV^-6 +TOTAL : 1.932451 sec INFO: No Floating Point Exceptions have been reported - 15,931,475,462 cycles:u # 3.386 GHz (75.01%) - 2,790,044 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.03%) - 6,488,872 stalled-cycles-backend:u # 0.04% backend cycles idle (74.98%) - 12,976,776,317 instructions:u # 0.81 insn per cycle - # 0.00 stalled cycles per insn (74.92%) - 4.753084936 seconds time elapsed + 6,311,455,519 cycles # 2.848 GHz + 13,762,708,375 instructions # 2.18 insn per cycle + 2.272906437 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 9.855155e-03 -Avg ME (F77/GPU) = 9.8696023209835834E-003 -Relative difference = 0.0014659658811639687 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.849636e-03 +Avg ME (F77/GPU) = 9.8712405367667715E-003 +Relative difference = 0.0021934350433631634 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.068990e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.069020e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.069020e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.924324e-03 +- 4.918778e-03 ) GeV^-6 -TOTAL : 4.949661 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.477402e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.477656e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.477656e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 +TOTAL : 6.233047 sec INFO: No Floating Point Exceptions have been reported - 16,872,539,803 cycles:u # 3.406 GHz (74.97%) - 19,348,489 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.97%) - 1,719,375,862 stalled-cycles-backend:u # 10.19% backend cycles idle (74.97%) - 51,753,051,978 instructions:u # 3.07 insn per cycle - # 0.03 stalled cycles per insn (74.97%) - 4.957569698 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:27678) (avx2: 0) (512y: 0) (512z: 0) + 17,803,580,317 cycles # 2.855 GHz + 53,580,069,164 instructions # 3.01 insn per cycle + 6.237030677 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:20207) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087396841E-003 -Relative difference = 2.119623377106246e-08 +Avg ME (F77/C++) = 9.8479612087582491E-003 +Relative difference = 2.1198118933954545e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.251516e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.251999e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.251999e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.924322e-03 +- 4.918776e-03 ) GeV^-6 -TOTAL : 1.246488 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.307415e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.307805e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.307805e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 +TOTAL : 1.601039 sec INFO: No Floating Point Exceptions have been reported - 4,011,784,105 cycles:u # 3.208 GHz (75.07%) - 772,500 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.05%) - 398,973,911 stalled-cycles-backend:u # 9.95% backend cycles idle (75.05%) - 13,716,235,420 instructions:u # 3.42 insn per cycle - # 0.03 stalled cycles per insn (75.05%) - 1.263384384 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:97728) (avx2: 0) (512y: 0) (512z: 0) + 4,572,009,891 cycles # 2.850 GHz + 13,755,353,111 instructions # 3.01 insn per cycle + 1.605120576 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:96606) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.847957e-03 -Avg ME (F77/C++) = 9.8479574833965355E-003 -Relative difference = 4.9085971470122835e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.847955e-03 +Avg ME (F77/C++) = 9.8479546896225560E-003 +Relative difference = 3.151694379513441e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.021642e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.021902e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.021902e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 -TOTAL : 0.522051 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.585961e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.587683e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.587683e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.806516 sec INFO: No Floating Point Exceptions have been reported - 1,798,107,812 cycles:u # 3.421 GHz (74.15%) - 427,486 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.44%) - 146,647,209 stalled-cycles-backend:u # 8.16% backend cycles idle (75.20%) - 4,777,320,458 instructions:u # 2.66 insn per cycle - # 0.03 stalled cycles per insn (75.65%) - 0.528489057 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84793) (512y: 0) (512z: 0) + 2,141,220,761 cycles # 2.644 GHz + 4,818,439,860 instructions # 2.25 insn per cycle + 0.810543510 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:85359) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 -Avg ME (F77/C++) = 9.8929728161012351E-003 -Relative difference = 1.8588827066662492e-08 +Avg ME (F77/C++) = 9.8929728161070967E-003 +Relative difference = 1.8588234562202478e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.583228e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.585349e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.585349e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.701072 sec +INFO: No Floating Point Exceptions have been reported + 1,870,651,613 cycles # 2.656 GHz + 4,275,203,774 instructions # 2.29 insn per cycle + 0.705038579 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81075) (512y: 26) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.892973e-03 +Avg ME (F77/C++) = 9.8929728161070967E-003 +Relative difference = 1.8588234562202478e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.570140e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.572065e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.572065e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 +TOTAL : 0.808794 sec +INFO: No Floating Point Exceptions have been reported + 1,356,929,556 cycles # 1.671 GHz + 2,164,613,956 instructions # 1.60 insn per cycle + 0.812781092 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3487) (512y: 34) (512z:79499) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.892981e-03 +Avg ME (F77/C++) = 9.8929811982955140E-003 +Relative difference = 2.0044060904369713e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 7994fb35b0..dfab5870bc 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-16_16:06:18 +DATE: 2024-05-16_14:47:13 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/check_hip.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.408577e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.414468e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.414507e+01 ) sec^-1 -MeanMatrixElemValue = ( 3.297256e-04 +- 2.011325e-04 ) GeV^-6 -TOTAL : 9.956495 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.689455e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.689959e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.690212e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 2.179203 sec INFO: No Floating Point Exceptions have been reported - 33,567,482,139 cycles:u # 3.371 GHz (74.95%) - 3,460,523 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.98%) - 6,543,761 stalled-cycles-backend:u # 0.02% backend cycles idle (75.03%) - 26,527,364,853 instructions:u # 0.79 insn per cycle - # 0.00 stalled cycles per insn (75.07%) - 10.022352268 seconds time elapsed + 7,126,539,551 cycles # 2.849 GHz + 15,807,759,758 instructions # 2.22 insn per cycle + 2.559686036 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.292206e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.295533e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.295553e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.973353e-04 +- 5.853892e-04 ) GeV^-6 -TOTAL : 9.372690 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.107889e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.108154e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.108192e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 3.424856 sec INFO: No Floating Point Exceptions have been reported - 31,571,554,884 cycles:u # 3.365 GHz (74.99%) - 3,356,337 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.01%) - 6,949,101 stalled-cycles-backend:u # 0.02% backend cycles idle (75.02%) - 24,987,178,999 instructions:u # 0.79 insn per cycle - # 0.00 stalled cycles per insn (75.03%) - 9.419617214 seconds time elapsed + 10,729,635,772 cycles # 2.852 GHz + 25,204,058,412 instructions # 2.35 insn per cycle + 3.820430433 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 9.872263e-03 -Avg ME (F77/GPU) = 9.8722599015656533E-003 -Relative difference = 3.138524921691728e-07 +Avg ME (F77/GPU) = 9.8722599015656498E-003 +Relative difference = 3.1385249252060663e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 9.913091e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.913361e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.913361e+01 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 5.328063 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.303415e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.303607e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.303607e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 7.238072 sec INFO: No Floating Point Exceptions have been reported - 18,090,484,403 cycles:u # 3.393 GHz (74.95%) - 31,419,411 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.94%) - 2,281,933,190 stalled-cycles-backend:u # 12.61% backend cycles idle (74.94%) - 55,395,326,342 instructions:u # 3.06 insn per cycle - # 0.04 stalled cycles per insn (74.99%) - 5.335013340 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:44898) (avx2: 0) (512y: 0) (512z: 0) + 19,150,406,884 cycles # 2.645 GHz + 54,154,394,762 instructions # 2.83 insn per cycle + 7.242308052 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32067) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595861831675E-003 Relative difference = 3.457988134687711e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.319058e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.319196e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.319196e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.281605 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.497154e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.497235e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.497235e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 3.531757 sec INFO: No Floating Point Exceptions have been reported - 7,899,102,195 cycles:u # 3.457 GHz (74.80%) - 583,779 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.87%) - 790,863,753 stalled-cycles-backend:u # 10.01% backend cycles idle (75.04%) - 25,844,357,323 instructions:u # 3.27 insn per cycle - # 0.03 stalled cycles per insn (75.15%) - 2.288299376 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:96804) (avx2: 0) (512y: 0) (512z: 0) + 9,343,938,644 cycles # 2.643 GHz + 26,158,830,842 instructions # 2.80 insn per cycle + 3.535758073 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:96007) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594844308162E-003 Relative difference = 3.5610570575237004e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 5.387508e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.388294e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.388294e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 0.986063 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.453828e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.454275e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.454275e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.534198 sec INFO: No Floating Point Exceptions have been reported - 3,393,191,576 cycles:u # 3.428 GHz (75.01%) - 1,102,520 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.95%) - 272,550,314 stalled-cycles-backend:u # 8.03% backend cycles idle (74.95%) - 9,067,112,205 instructions:u # 2.67 insn per cycle - # 0.03 stalled cycles per insn (74.95%) - 0.992852344 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83820) (512y: 0) (512z: 0) + 4,069,691,610 cycles # 2.648 GHz + 9,228,168,046 instructions # 2.27 insn per cycle + 1.538179495 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84155) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.986335e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.986931e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.986931e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.329366 sec +INFO: No Floating Point Exceptions have been reported + 3,528,184,184 cycles # 2.647 GHz + 8,174,614,993 instructions # 2.32 insn per cycle + 1.333451918 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:79844) (512y: 79) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722594324461913E-003 +Relative difference = 3.613714310412983e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.367967e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.368468e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.368468e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.573046 sec +INFO: No Floating Point Exceptions have been reported + 2,618,946,865 cycles # 1.661 GHz + 4,154,480,374 instructions # 1.59 insn per cycle + 1.577054610 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2046) (512y: 93) (512z:78760) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722594324461913E-003 +Relative difference = 3.613714310412983e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt index 8c8684e3d6..3ddfb4805b 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-05-16_16:07:36 +DATE: 2024-05-16_14:48:06 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/check_hip.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.512212e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.517450e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.517493e+01 ) sec^-1 -MeanMatrixElemValue = ( 3.297256e-04 +- 2.011325e-04 ) GeV^-6 -TOTAL : 9.863561 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.679279e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.679786e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.680054e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 2.178048 sec INFO: No Floating Point Exceptions have been reported - 33,411,525,809 cycles:u # 3.381 GHz (75.00%) - 3,395,895 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.99%) - 6,487,254 stalled-cycles-backend:u # 0.02% backend cycles idle (75.05%) - 26,457,448,109 instructions:u # 0.79 insn per cycle - # 0.00 stalled cycles per insn (75.05%) - 9.914327052 seconds time elapsed + 7,134,880,740 cycles # 2.851 GHz + 15,434,594,866 instructions # 2.16 insn per cycle + 2.558453633 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.303149e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.306076e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.306096e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.973353e-04 +- 5.853892e-04 ) GeV^-6 -TOTAL : 9.350448 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.104221e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.104483e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.104525e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 3.411600 sec INFO: No Floating Point Exceptions have been reported - 30,640,968,158 cycles:u # 3.271 GHz (74.99%) - 3,457,860 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.98%) - 7,072,147 stalled-cycles-backend:u # 0.02% backend cycles idle (74.98%) - 24,241,249,676 instructions:u # 0.79 insn per cycle - # 0.00 stalled cycles per insn (74.98%) - 9.396296167 seconds time elapsed + 10,672,973,002 cycles # 2.855 GHz + 24,521,846,399 instructions # 2.30 insn per cycle + 3.794724712 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 9.872263e-03 -Avg ME (F77/GPU) = 9.8722599015656533E-003 -Relative difference = 3.138524921691728e-07 +Avg ME (F77/GPU) = 9.8722599015656498E-003 +Relative difference = 3.1385249252060663e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 9.991963e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.992233e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.992233e+01 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 5.286309 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.893217e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.893439e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.893439e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 6.694276 sec INFO: No Floating Point Exceptions have been reported - 18,200,100,367 cycles:u # 3.441 GHz (74.92%) - 31,693,487 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.99%) - 2,173,708,240 stalled-cycles-backend:u # 11.94% backend cycles idle (75.05%) - 55,300,519,357 instructions:u # 3.04 insn per cycle - # 0.04 stalled cycles per insn (75.05%) - 5.293712357 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:44806) (avx2: 0) (512y: 0) (512z: 0) + 19,121,414,788 cycles # 2.855 GHz + 54,156,458,090 instructions # 2.83 insn per cycle + 6.698138270 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32244) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595861831675E-003 Relative difference = 3.457988134687711e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.042520e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.042641e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.042641e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.590307 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.495395e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.495480e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.495480e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 3.535528 sec INFO: No Floating Point Exceptions have been reported - 7,993,267,480 cycles:u # 3.081 GHz (75.02%) - 2,354,729 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.02%) - 788,725,170 stalled-cycles-backend:u # 9.87% backend cycles idle (75.02%) - 25,756,603,572 instructions:u # 3.22 insn per cycle - # 0.03 stalled cycles per insn (75.02%) - 2.597135056 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:96765) (avx2: 0) (512y: 0) (512z: 0) + 9,391,010,006 cycles # 2.654 GHz + 26,079,707,862 instructions # 2.78 insn per cycle + 3.539600596 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:95901) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594844308162E-003 Relative difference = 3.5610570575237004e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.797254e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.797903e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.797903e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.106761 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.518532e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.518969e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.518969e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.505616 sec INFO: No Floating Point Exceptions have been reported - 3,391,654,110 cycles:u # 3.054 GHz (74.79%) - 718,366 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.79%) - 300,486,852 stalled-cycles-backend:u # 8.86% backend cycles idle (74.79%) - 9,091,654,928 instructions:u # 2.68 insn per cycle - # 0.03 stalled cycles per insn (75.10%) - 1.113615121 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83378) (512y: 0) (512z: 0) + 4,001,150,405 cycles # 2.652 GHz + 9,212,868,850 instructions # 2.30 insn per cycle + 1.509560632 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83776) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.985927e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.986486e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.986486e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.329709 sec +INFO: No Floating Point Exceptions have been reported + 3,529,740,112 cycles # 2.648 GHz + 8,168,252,869 instructions # 2.31 insn per cycle + 1.333651402 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:79373) (512y: 229) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722594324461913E-003 +Relative difference = 3.613714310412983e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.359545e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.360045e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.360045e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.576721 sec +INFO: No Floating Point Exceptions have been reported + 2,623,702,370 cycles # 1.660 GHz + 4,153,356,804 instructions # 1.58 insn per cycle + 1.580839869 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1493) (512y: 175) (512z:78776) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722594324461913E-003 +Relative difference = 3.613714310412983e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 188bf1165b..38bc670a18 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -1,70 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-16_16:01:39 +DATE: 2024-05-16_14:42:48 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe: Segmentation fault - 713,963,187 cycles:u # 2.170 GHz (73.28%) - 2,042,187 stalled-cycles-frontend:u # 0.29% frontend cycles idle (76.83%) - 5,187,442 stalled-cycles-backend:u # 0.73% backend cycles idle (75.80%) - 1,231,009,265 instructions:u # 1.72 insn per cycle - # 0.00 stalled cycles per insn (76.48%) - 0.407439823 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.927387e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.315718e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.634653e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.452451 sec +INFO: No Floating Point Exceptions have been reported + 1,879,085,625 cycles # 2.815 GHz + 2,632,406,951 instructions # 1.40 insn per cycle + 0.724903288 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe: Segmentation fault - 896,221,881 cycles:u # 2.079 GHz (75.90%) - 2,055,660 stalled-cycles-frontend:u # 0.23% frontend cycles idle (75.90%) - 5,691,868 stalled-cycles-backend:u # 0.64% backend cycles idle (75.92%) - 1,375,853,093 instructions:u # 1.54 insn per cycle - # 0.00 stalled cycles per insn (75.91%) - 0.493613360 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.675522e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.208336e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.557322e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.534593 sec +INFO: No Floating Point Exceptions have been reported + 2,165,913,457 cycles # 2.812 GHz + 3,139,398,529 instructions # 1.45 insn per cycle + 0.827804422 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424749e-01 +Avg ME (F77/GPU) = 0.14247482467490466 +Relative difference = 5.286902838873106e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.011560e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.033153e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.033153e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.642980 sec +INFO: No Floating Point Exceptions have been reported + 4,710,402,412 cycles # 2.861 GHz + 13,462,495,012 instructions # 2.86 insn per cycle + 1.647108070 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 860) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/runTest_hip.exe -Memory access fault by GPU node-4 (Agent handle: 0x6026a0) on address 0x14f44de7a000. Reason: Unknown. +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467499484 +Relative difference = 5.286896509487005e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.839775e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.910542e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.910542e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.913158 sec +INFO: No Floating Point Exceptions have been reported + 2,620,816,977 cycles # 2.859 GHz + 7,551,970,333 instructions # 2.88 insn per cycle + 0.917276709 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3099) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467499478 +Relative difference = 5.28689651338321e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.970408e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.156692e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.156692e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.574933 sec +INFO: No Floating Point Exceptions have been reported + 1,480,758,822 cycles # 2.560 GHz + 3,119,703,419 instructions # 2.11 insn per cycle + 0.579132992 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2917) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492595 +Relative difference = 5.286901344678233e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.428889e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.682851e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.682851e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.501020 sec +INFO: No Floating Point Exceptions have been reported + 1,347,520,276 cycles # 2.670 GHz + 2,981,434,055 instructions # 2.21 insn per cycle + 0.505363497 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2694) (512y: 104) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492595 +Relative difference = 5.286901344678233e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.241546e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.347861e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.347861e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.756225 sec +INFO: No Floating Point Exceptions have been reported + 1,330,320,612 cycles # 1.751 GHz + 1,953,406,018 instructions # 1.47 insn per cycle + 0.760489864 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1372) (512y: 106) (512z: 2173) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492595 +Relative difference = 5.286901344678233e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt index e662317ff0..6f141963c0 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt @@ -1,78 +1,250 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-16_16:28:10 +DATE: 2024-05-16_15:02:27 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe: Segmentation fault - 769,673,769 cycles:u # 2.223 GHz (74.48%) - 2,560,023 stalled-cycles-frontend:u # 0.33% frontend cycles idle (74.72%) - 28,792,356 stalled-cycles-backend:u # 3.74% backend cycles idle (75.65%) - 1,230,708,006 instructions:u # 1.60 insn per cycle - # 0.02 stalled cycles per insn (77.63%) - 0.415541760 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.428295e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.103056e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.103056e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.480636 sec +INFO: No Floating Point Exceptions have been reported + 1,959,891,585 cycles # 2.818 GHz + 2,927,619,706 instructions # 1.49 insn per cycle + 0.752080667 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe: Segmentation fault - 2,975,700,076 cycles:u # 2.756 GHz (75.87%) - 16,209,947 stalled-cycles-frontend:u # 0.54% frontend cycles idle (75.56%) - 848,638,694 stalled-cycles-backend:u # 28.52% backend cycles idle (75.54%) - 3,194,109,069 instructions:u # 1.07 insn per cycle - # 0.27 stalled cycles per insn (75.52%) - 1.140659477 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.157968e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.371122e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.371122e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.764001 sec +INFO: No Floating Point Exceptions have been reported + 2,873,640,599 cycles # 2.829 GHz + 4,407,079,803 instructions # 1.53 insn per cycle + 1.073816079 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424749e-01 +Avg ME (F77/GPU) = 0.14247482467490466 +Relative difference = 5.286902838873106e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.008642e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.030371e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.030371e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.654853 sec +INFO: No Floating Point Exceptions have been reported + 4,747,034,662 cycles # 2.862 GHz + 13,469,694,473 instructions # 2.84 insn per cycle + 1.659302078 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 860) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467499484 +Relative difference = 5.286896509487005e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.820348e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.892211e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.892211e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.930951 sec +INFO: No Floating Point Exceptions have been reported + 2,665,977,292 cycles # 2.852 GHz + 7,601,998,240 instructions # 2.85 insn per cycle + 0.935555380 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3099) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/runTest_hip.exe -Memory access fault by GPU node-4 (Agent handle: 0x6026a0) on address 0x14c1f811a000. Reason: Unknown. +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467499478 +Relative difference = 5.28689651338321e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.068620e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.272960e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.272960e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.564348 sec +INFO: No Floating Point Exceptions have been reported + 1,513,664,570 cycles # 2.669 GHz + 3,168,463,518 instructions # 2.09 insn per cycle + 0.568761168 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2917) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492595 +Relative difference = 5.286901344678233e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.408389e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.655047e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.655047e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.511104 sec +INFO: No Floating Point Exceptions have been reported + 1,377,582,779 cycles # 2.675 GHz + 3,030,644,125 instructions # 2.20 insn per cycle + 0.515560343 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2694) (512y: 104) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492595 +Relative difference = 5.286901344678233e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.221799e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.329402e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.329402e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.769820 sec +INFO: No Floating Point Exceptions have been reported + 1,366,102,927 cycles # 1.765 GHz + 1,991,071,116 instructions # 1.46 insn per cycle + 0.774386560 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1372) (512y: 106) (512z: 2173) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492595 +Relative difference = 5.286901344678233e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt index d10e43658c..25b8d3c885 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt @@ -1,70 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-16_16:01:48 +DATE: 2024-05-16_14:43:01 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe: Segmentation fault - 722,050,915 cycles:u # 2.167 GHz (72.82%) - 2,092,789 stalled-cycles-frontend:u # 0.29% frontend cycles idle (77.11%) - 4,947,015 stalled-cycles-backend:u # 0.69% backend cycles idle (76.05%) - 1,226,010,153 instructions:u # 1.70 insn per cycle - # 0.00 stalled cycles per insn (76.56%) - 0.389591226 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.907170e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.197971e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.504611e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.455771 sec +INFO: No Floating Point Exceptions have been reported + 1,881,865,516 cycles # 2.813 GHz + 2,669,782,801 instructions # 1.42 insn per cycle + 0.727786761 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe: Segmentation fault - 923,029,168 cycles:u # 2.154 GHz (74.77%) - 2,266,174 stalled-cycles-frontend:u # 0.25% frontend cycles idle (72.08%) - 4,971,550 stalled-cycles-backend:u # 0.54% backend cycles idle (74.17%) - 1,357,666,646 instructions:u # 1.47 insn per cycle - # 0.00 stalled cycles per insn (76.61%) - 0.481792171 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.641992e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.081273e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.416654e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.532301 sec +INFO: No Floating Point Exceptions have been reported + 2,167,822,822 cycles # 2.823 GHz + 3,120,353,321 instructions # 1.44 insn per cycle + 0.825343283 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424749e-01 +Avg ME (F77/GPU) = 0.14247482467490466 +Relative difference = 5.286902838873106e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.007784e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.029112e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.029112e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.648995 sec +INFO: No Floating Point Exceptions have been reported + 4,725,323,359 cycles # 2.860 GHz + 13,457,369,308 instructions # 2.85 insn per cycle + 1.653142214 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 849) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/runTest_hip.exe -Memory access fault by GPU node-4 (Agent handle: 0x6015f0) on address 0x14c2a6b19000. Reason: Unknown. +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467499484 +Relative difference = 5.286896509487005e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.833913e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.904030e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.904030e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.915889 sec +INFO: No Floating Point Exceptions have been reported + 2,628,184,982 cycles # 2.858 GHz + 7,551,273,836 instructions # 2.87 insn per cycle + 0.920086997 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3092) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467499478 +Relative difference = 5.28689651338321e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.116183e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.320457e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.320457e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.548674 sec +INFO: No Floating Point Exceptions have been reported + 1,476,841,675 cycles # 2.675 GHz + 3,117,924,257 instructions # 2.11 insn per cycle + 0.552738607 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2900) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492595 +Relative difference = 5.286901344678233e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.456247e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.706124e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.706124e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.496887 sec +INFO: No Floating Point Exceptions have been reported + 1,340,057,166 cycles # 2.677 GHz + 2,978,732,248 instructions # 2.22 insn per cycle + 0.501058940 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2670) (512y: 104) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492595 +Relative difference = 5.286901344678233e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.241283e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.347840e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.347840e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.757118 sec +INFO: No Floating Point Exceptions have been reported + 1,329,966,748 cycles # 1.749 GHz + 1,951,787,640 instructions # 1.47 insn per cycle + 0.761356492 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1348) (512y: 106) (512z: 2173) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492595 +Relative difference = 5.286901344678233e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 6f72dff6a1..88eaa7d80d 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -1,70 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-16_16:01:51 +DATE: 2024-05-16_14:43:15 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe: Segmentation fault - 739,138,184 cycles:u # 2.272 GHz (70.20%) - 2,203,118 stalled-cycles-frontend:u # 0.30% frontend cycles idle (73.01%) - 4,809,093 stalled-cycles-backend:u # 0.65% backend cycles idle (75.72%) - 1,197,836,797 instructions:u # 1.62 insn per cycle - # 0.00 stalled cycles per insn (75.47%) - 0.379451473 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.867335e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.223690e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.343650e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 +TOTAL : 0.450195 sec +INFO: No Floating Point Exceptions have been reported + 1,886,543,936 cycles # 2.814 GHz + 2,627,629,254 instructions # 1.39 insn per cycle + 0.729554150 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 167 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe: Segmentation fault - 806,619,492 cycles:u # 2.156 GHz (75.32%) - 2,155,748 stalled-cycles-frontend:u # 0.27% frontend cycles idle (74.43%) - 5,351,844 stalled-cycles-backend:u # 0.66% backend cycles idle (75.37%) - 1,293,687,319 instructions:u # 1.60 insn per cycle - # 0.00 stalled cycles per insn (74.61%) - 0.410509600 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.183442e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.842494e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.962990e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.571361e+02 +- 2.114021e+02 ) GeV^-2 +TOTAL : 0.482910 sec +INFO: No Floating Point Exceptions have been reported + 1,994,696,147 cycles # 2.812 GHz + 2,828,466,882 instructions # 1.42 insn per cycle + 0.766894337 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424226e-01 +Avg ME (F77/GPU) = 0.14247488790821983 +Relative difference = 0.00036713209996037764 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.069532e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.093791e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.093791e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 1.553490 sec +INFO: No Floating Point Exceptions have been reported + 4,455,366,971 cycles # 2.862 GHz + 13,047,769,817 instructions # 2.93 insn per cycle + 1.557630020 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 745) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/runTest_hip.exe -Memory access fault by GPU node-4 (Agent handle: 0x5ee6a0) on address 0x148dbe8a5000. Reason: Unknown. +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424686e-01 +Avg ME (F77/C++) = 0.14246857540270419 +Relative difference = 1.7265064590569047e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.869084e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.052765e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.052765e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 +TOTAL : 0.592151 sec +INFO: No Floating Point Exceptions have been reported + 1,701,146,602 cycles # 2.856 GHz + 4,512,165,265 instructions # 2.65 insn per cycle + 0.596248693 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3600) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424686e-01 +Avg ME (F77/C++) = 0.14246859631675157 +Relative difference = 2.5853054135974944e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.609679e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.315056e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.315056e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.313413 sec +INFO: No Floating Point Exceptions have been reported + 850,737,642 cycles # 2.684 GHz + 1,895,945,890 instructions # 2.23 insn per cycle + 0.317546154 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3491) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247489318272599 +Relative difference = 4.784894739577799e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.973396e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.785303e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.785303e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.295859 sec +INFO: No Floating Point Exceptions have been reported + 801,819,935 cycles # 2.679 GHz + 1,819,229,849 instructions # 2.27 insn per cycle + 0.299944027 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3335) (512y: 22) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247489318272599 +Relative difference = 4.784894739577799e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.354956e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.770974e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.770974e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.400076 sec +INFO: No Floating Point Exceptions have been reported + 733,009,701 cycles # 1.817 GHz + 1,304,250,799 instructions # 1.78 insn per cycle + 0.404216975 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1973) (512y: 32) (512z: 2382) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247489383243206 +Relative difference = 4.32888033512879e-08 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt index b1ad2e06ab..b62a8a0309 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt @@ -1,78 +1,250 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-16_16:28:15 +DATE: 2024-05-16_15:02:40 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe: Segmentation fault - 744,907,152 cycles:u # 2.134 GHz (76.47%) - 2,619,285 stalled-cycles-frontend:u # 0.35% frontend cycles idle (75.14%) - 21,668,084 stalled-cycles-backend:u # 2.91% backend cycles idle (74.88%) - 1,202,893,963 instructions:u # 1.61 insn per cycle - # 0.02 stalled cycles per insn (75.25%) - 0.428107332 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.337579e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.030007e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.030007e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.017654e+01 +- 1.429184e+01 ) GeV^-2 +TOTAL : 0.462122 sec +INFO: No Floating Point Exceptions have been reported + 1,901,719,201 cycles # 2.816 GHz + 2,811,032,752 instructions # 1.48 insn per cycle + 0.731978994 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 167 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe: Segmentation fault - 2,924,404,172 cycles:u # 2.888 GHz (73.83%) - 17,608,008 stalled-cycles-frontend:u # 0.60% frontend cycles idle (74.79%) - 854,434,104 stalled-cycles-backend:u # 29.22% backend cycles idle (75.54%) - 3,082,375,053 instructions:u # 1.05 insn per cycle - # 0.28 stalled cycles per insn (75.24%) - 1.041915815 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.907303e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.566216e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.566216e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.609942e+02 +- 2.115590e+02 ) GeV^-2 +TOTAL : 0.637098 sec +INFO: No Floating Point Exceptions have been reported + 2,459,040,544 cycles # 2.824 GHz + 3,715,271,980 instructions # 1.51 insn per cycle + 0.927773682 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424226e-01 +Avg ME (F77/GPU) = 0.14247488790821983 +Relative difference = 0.00036713209996037764 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.068152e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.092782e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.092782e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 1.559403 sec +INFO: No Floating Point Exceptions have been reported + 4,475,912,555 cycles # 2.864 GHz + 13,052,235,712 instructions # 2.92 insn per cycle + 1.563691095 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 745) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424686e-01 +Avg ME (F77/C++) = 0.14246857540270419 +Relative difference = 1.7265064590569047e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.856394e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.039884e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.039884e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 +TOTAL : 0.599599 sec +INFO: No Floating Point Exceptions have been reported + 1,723,185,860 cycles # 2.856 GHz + 4,560,285,596 instructions # 2.65 insn per cycle + 0.603925442 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3600) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/runTest_hip.exe -Memory access fault by GPU node-4 (Agent handle: 0x5ee6a0) on address 0x1531a427d000. Reason: Unknown. +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424686e-01 +Avg ME (F77/C++) = 0.14246859631675157 +Relative difference = 2.5853054135974944e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.545801e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.241062e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.241062e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.321141 sec +INFO: No Floating Point Exceptions have been reported + 871,513,310 cycles # 2.683 GHz + 1,932,959,243 instructions # 2.22 insn per cycle + 0.325378385 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3491) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247489318272599 +Relative difference = 4.784894739577799e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.891127e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.696072e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.696072e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.304221 sec +INFO: No Floating Point Exceptions have been reported + 825,995,486 cycles # 2.683 GHz + 1,856,161,781 instructions # 2.25 insn per cycle + 0.308416114 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3335) (512y: 22) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247489318272599 +Relative difference = 4.784894739577799e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.307702e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.720545e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.720545e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.408443 sec +INFO: No Floating Point Exceptions have been reported + 755,445,387 cycles # 1.833 GHz + 1,345,989,570 instructions # 1.78 insn per cycle + 0.412779323 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1973) (512y: 32) (512z: 2382) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247489383243206 +Relative difference = 4.32888033512879e-08 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt index 3fa0b52721..f782cd39a5 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt @@ -1,70 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-16_16:01:55 +DATE: 2024-05-16_14:43:27 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe: Segmentation fault - 722,785,633 cycles:u # 1.960 GHz (75.07%) - 2,293,673 stalled-cycles-frontend:u # 0.32% frontend cycles idle (70.85%) - 6,301,391 stalled-cycles-backend:u # 0.87% backend cycles idle (73.24%) - 1,209,125,825 instructions:u # 1.67 insn per cycle - # 0.01 stalled cycles per insn (77.15%) - 0.415465911 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.882997e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.225822e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.344729e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 +TOTAL : 0.447687 sec +INFO: No Floating Point Exceptions have been reported + 1,891,564,072 cycles # 2.816 GHz + 2,660,739,786 instructions # 1.41 insn per cycle + 0.729746219 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 167 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe: Segmentation fault - 789,672,018 cycles:u # 1.841 GHz (75.32%) - 2,108,026 stalled-cycles-frontend:u # 0.27% frontend cycles idle (75.87%) - 5,194,088 stalled-cycles-backend:u # 0.66% backend cycles idle (76.73%) - 1,297,221,444 instructions:u # 1.64 insn per cycle - # 0.00 stalled cycles per insn (75.85%) - 0.514680463 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.107850e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.805980e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.921999e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.571361e+02 +- 2.114021e+02 ) GeV^-2 +TOTAL : 0.485790 sec +INFO: No Floating Point Exceptions have been reported + 1,996,906,378 cycles # 2.807 GHz + 2,867,667,096 instructions # 1.44 insn per cycle + 0.769333150 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424226e-01 +Avg ME (F77/GPU) = 0.14247488790821983 +Relative difference = 0.00036713209996037764 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.069812e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.094168e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.094168e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 1.553096 sec +INFO: No Floating Point Exceptions have been reported + 4,454,505,799 cycles # 2.862 GHz + 13,029,391,838 instructions # 2.92 insn per cycle + 1.557292510 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 727) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/runTest_hip.exe -Memory access fault by GPU node-4 (Agent handle: 0x5ed5f0) on address 0x14ca6eee4000. Reason: Unknown. +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424686e-01 +Avg ME (F77/C++) = 0.14246857540270419 +Relative difference = 1.7265064590569047e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.876347e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.060596e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.060596e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 +TOTAL : 0.590255 sec +INFO: No Floating Point Exceptions have been reported + 1,693,495,983 cycles # 2.852 GHz + 4,508,141,451 instructions # 2.66 insn per cycle + 0.594398488 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3588) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424686e-01 +Avg ME (F77/C++) = 0.14246859631675157 +Relative difference = 2.5853054135974944e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.574680e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.273652e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.273652e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.315080 sec +INFO: No Floating Point Exceptions have been reported + 851,359,645 cycles # 2.672 GHz + 1,893,112,803 instructions # 2.22 insn per cycle + 0.319204462 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3461) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247489318272599 +Relative difference = 4.784894739577799e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.978403e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.785893e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.785893e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.295093 sec +INFO: No Floating Point Exceptions have been reported + 799,712,323 cycles # 2.678 GHz + 1,814,979,638 instructions # 2.27 insn per cycle + 0.299228201 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3298) (512y: 22) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247489318272599 +Relative difference = 4.784894739577799e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.317992e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.737735e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.737735e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.402763 sec +INFO: No Floating Point Exceptions have been reported + 736,511,578 cycles # 1.812 GHz + 1,302,115,541 instructions # 1.77 insn per cycle + 0.406867415 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1936) (512y: 32) (512z: 2382) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247489383243206 +Relative difference = 4.32888033512879e-08 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index cb57e418df..265a4a7626 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -1,70 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-16_16:01:59 +DATE: 2024-05-16_14:43:39 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe: Segmentation fault - 708,194,008 cycles:u # 1.920 GHz (75.96%) - 2,073,803 stalled-cycles-frontend:u # 0.29% frontend cycles idle (76.55%) - 6,021,706 stalled-cycles-backend:u # 0.85% backend cycles idle (74.51%) - 1,268,817,780 instructions:u # 1.79 insn per cycle - # 0.00 stalled cycles per insn (72.13%) - 0.408351550 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.940149e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.336219e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.662963e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.452840 sec +INFO: No Floating Point Exceptions have been reported + 1,880,363,198 cycles # 2.808 GHz + 2,677,692,820 instructions # 1.42 insn per cycle + 0.726161506 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe: Segmentation fault - 905,656,386 cycles:u # 1.899 GHz (74.39%) - 2,346,083 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.27%) - 5,205,158 stalled-cycles-backend:u # 0.57% backend cycles idle (73.59%) - 1,403,918,632 instructions:u # 1.55 insn per cycle - # 0.00 stalled cycles per insn (73.04%) - 0.502193913 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.684159e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.236315e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.588311e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.530728 sec +INFO: No Floating Point Exceptions have been reported + 2,164,642,485 cycles # 2.821 GHz + 3,145,530,012 instructions # 1.45 insn per cycle + 0.824333778 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424749e-01 +Avg ME (F77/GPU) = 0.14247482577104625 +Relative difference = 5.209967070245855e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.003476e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.024445e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.024445e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.655769 sec +INFO: No Floating Point Exceptions have been reported + 4,745,491,139 cycles # 2.860 GHz + 13,466,039,366 instructions # 2.84 insn per cycle + 1.659848552 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 840) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/runTest_hip.exe -Memory access fault by GPU node-4 (Agent handle: 0x6026a0) on address 0x14b91ff9a000. Reason: Unknown. +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482734618697 +Relative difference = 5.099411406595165e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.849332e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.920343e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.920343e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.908133 sec +INFO: No Floating Point Exceptions have been reported + 2,605,721,632 cycles # 2.858 GHz + 7,384,650,569 instructions # 2.83 insn per cycle + 0.912227813 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3073) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482734618697 +Relative difference = 5.099411406595165e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.133010e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.340359e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.340359e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.546033 sec +INFO: No Floating Point Exceptions have been reported + 1,469,888,298 cycles # 2.674 GHz + 3,055,461,884 instructions # 2.08 insn per cycle + 0.550169150 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3013) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.544324e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.807645e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.807645e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.485213 sec +INFO: No Floating Point Exceptions have been reported + 1,307,959,720 cycles # 2.676 GHz + 2,930,377,532 instructions # 2.24 insn per cycle + 0.489382978 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2799) (512y: 110) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.172350e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.272043e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.272043e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.779592 sec +INFO: No Floating Point Exceptions have been reported + 1,368,592,699 cycles # 1.747 GHz + 1,969,378,714 instructions # 1.44 insn per cycle + 0.783958712 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1700) (512y: 114) (512z: 2171) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt index 437a700155..84e80111cc 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt @@ -1,70 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-05-16_16:02:03 +DATE: 2024-05-16_14:43:53 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe: Segmentation fault - 705,570,664 cycles:u # 1.900 GHz (76.20%) - 2,241,490 stalled-cycles-frontend:u # 0.32% frontend cycles idle (72.84%) - 6,208,242 stalled-cycles-backend:u # 0.88% backend cycles idle (71.13%) - 1,273,792,982 instructions:u # 1.81 insn per cycle - # 0.00 stalled cycles per insn (74.02%) - 0.428782940 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.890956e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.181054e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.513059e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.453973 sec +INFO: No Floating Point Exceptions have been reported + 1,876,167,670 cycles # 2.808 GHz + 2,662,885,558 instructions # 1.42 insn per cycle + 0.726739496 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe: Segmentation fault - 901,184,138 cycles:u # 1.897 GHz (74.40%) - 2,252,378 stalled-cycles-frontend:u # 0.25% frontend cycles idle (73.94%) - 5,221,412 stalled-cycles-backend:u # 0.58% backend cycles idle (73.24%) - 1,363,461,109 instructions:u # 1.51 insn per cycle - # 0.00 stalled cycles per insn (75.20%) - 0.512631423 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.642147e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.081360e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.416296e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.533908 sec +INFO: No Floating Point Exceptions have been reported + 2,163,893,097 cycles # 2.818 GHz + 3,132,561,280 instructions # 1.45 insn per cycle + 0.826852700 seconds time elapsed +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/runTest_cuda.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424749e-01 +Avg ME (F77/GPU) = 0.14247482577104625 +Relative difference = 5.209967070245855e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.007176e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.028375e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.028375e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.650928 sec +INFO: No Floating Point Exceptions have been reported + 4,733,031,285 cycles # 2.861 GHz + 13,451,191,160 instructions # 2.84 insn per cycle + 1.655053441 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 827) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/runTest_hip.exe -Memory access fault by GPU node-4 (Agent handle: 0x6015f0) on address 0x153235909000. Reason: Unknown. +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482734618697 +Relative difference = 5.099411406595165e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.847760e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.919370e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.919370e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.909156 sec +INFO: No Floating Point Exceptions have been reported + 2,606,818,939 cycles # 2.857 GHz + 7,388,977,556 instructions # 2.83 insn per cycle + 0.913243210 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3062) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482734618697 +Relative difference = 5.099411406595165e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.915489e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.093943e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.093943e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.585235 sec +INFO: No Floating Point Exceptions have been reported + 1,469,957,671 cycles # 2.496 GHz + 3,055,084,256 instructions # 2.08 insn per cycle + 0.589443028 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2990) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.535422e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.797003e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.797003e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.486063 sec +INFO: No Floating Point Exceptions have been reported + 1,306,700,125 cycles # 2.669 GHz + 2,930,583,524 instructions # 2.24 insn per cycle + 0.490171496 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2775) (512y: 110) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.173668e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.273111e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.273111e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.778991 sec +INFO: No Floating Point Exceptions have been reported + 1,367,910,665 cycles # 1.749 GHz + 1,969,371,455 instructions # 1.44 insn per cycle + 0.783143035 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1676) (512y: 114) (512z: 2171) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt index 82a9bd67cc..8af6873425 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-05-16_16:48:00 +DATE: 2024-05-16_15:20:33 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.585455e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.456806e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.498470e+07 ) sec^-1 -MeanMatrixElemValue = ( 7.088120e+00 +- 1.629041e-01 ) GeV^0 -TOTAL : 0.397875 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.588343e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.081541e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.176224e+08 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 0.528808 sec INFO: No Floating Point Exceptions have been reported - 943,758,789 cycles:u # 2.286 GHz (73.85%) - 2,179,631 stalled-cycles-frontend:u # 0.23% frontend cycles idle (75.36%) - 5,460,392 stalled-cycles-backend:u # 0.58% backend cycles idle (74.84%) - 1,513,257,999 instructions:u # 1.60 insn per cycle - # 0.00 stalled cycles per insn (74.84%) - 0.452944684 seconds time elapsed + 2,192,111,166 cycles # 2.821 GHz + 3,135,008,318 instructions # 1.43 insn per cycle + 0.833908791 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 4.313472e+00 -Avg ME (F77/GPU) = 4.3134710926110271 -Relative difference = 2.1036162350152416e-07 +Avg ME (F77/GPU) = 4.3134710926110280 +Relative difference = 2.1036162329561614e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.306069e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.360222e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.360222e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 4.737139 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.865233e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.915227e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.915227e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 5.734356 sec INFO: No Floating Point Exceptions have been reported - 16,137,200,364 cycles:u # 3.397 GHz (74.92%) - 9,389,080 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.91%) - 3,553,701,667 stalled-cycles-backend:u # 22.02% backend cycles idle (74.98%) - 41,552,302,338 instructions:u # 2.57 insn per cycle - # 0.09 stalled cycles per insn (75.06%) - 4.754746528 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) + 16,430,057,220 cycles # 2.863 GHz + 42,484,854,801 instructions # 2.59 insn per cycle + 5.739849036 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 711) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105795 Relative difference = 2.1036172727915933e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.027570e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.213591e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.213591e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 2.790838 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.235376e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.401567e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.401567e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.353450 sec INFO: No Floating Point Exceptions have been reported - 9,377,032,948 cycles:u # 3.344 GHz (74.92%) - 9,531,206 stalled-cycles-frontend:u # 0.10% frontend cycles idle (74.91%) - 1,834,437,945 stalled-cycles-backend:u # 19.56% backend cycles idle (74.91%) - 26,195,629,639 instructions:u # 2.79 insn per cycle - # 0.07 stalled cycles per insn (74.91%) - 2.808195918 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2294) (avx2: 0) (512y: 0) (512z: 0) + 9,612,345,009 cycles # 2.863 GHz + 26,317,248,003 instructions # 2.74 insn per cycle + 3.358813940 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2388) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105804 Relative difference = 2.103617270732513e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 7.374241e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.917522e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.917522e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 1.612540 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.244474e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.678972e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.678972e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.111219 sec INFO: No Floating Point Exceptions have been reported - 5,293,962,555 cycles:u # 3.254 GHz (74.97%) - 7,444,911 stalled-cycles-frontend:u # 0.14% frontend cycles idle (74.92%) - 495,611,947 stalled-cycles-backend:u # 9.36% backend cycles idle (74.94%) - 12,323,263,372 instructions:u # 2.33 insn per cycle - # 0.04 stalled cycles per insn (74.94%) - 1.640715925 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2548) (512y: 0) (512z: 0) + 5,673,148,574 cycles # 2.682 GHz + 12,029,125,150 instructions # 2.12 insn per cycle + 2.116589548 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2532) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.759844e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.282682e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.282682e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 1.934603 sec +INFO: No Floating Point Exceptions have been reported + 5,185,525,755 cycles # 2.675 GHz + 11,158,849,555 instructions # 2.15 insn per cycle + 1.940086470 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2195) (512y: 148) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134710926107935 +Relative difference = 2.103616776553298e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.492671e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.676216e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.676216e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.114794 sec +INFO: No Floating Point Exceptions have been reported + 5,530,850,143 cycles # 1.773 GHz + 8,071,834,418 instructions # 1.46 insn per cycle + 3.120392658 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1471) (512y: 129) (512z: 1684) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134710926107935 +Relative difference = 2.103616776553298e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt index 84e30da395..746b04ecac 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-05-16_16:48:14 +DATE: 2024-05-16_15:20:58 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.514011e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.380921e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.420383e+07 ) sec^-1 -MeanMatrixElemValue = ( 7.088120e+00 +- 1.629041e-01 ) GeV^0 -TOTAL : 0.386498 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.594523e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.092654e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.188255e+08 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 0.533303 sec INFO: No Floating Point Exceptions have been reported - 931,603,369 cycles:u # 2.249 GHz (74.32%) - 2,257,954 stalled-cycles-frontend:u # 0.24% frontend cycles idle (74.41%) - 5,850,332 stalled-cycles-backend:u # 0.63% backend cycles idle (74.41%) - 1,489,222,217 instructions:u # 1.60 insn per cycle - # 0.00 stalled cycles per insn (74.92%) - 0.442258532 seconds time elapsed + 2,159,610,833 cycles # 2.816 GHz + 3,095,961,302 instructions # 1.43 insn per cycle + 0.825364511 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 4.313472e+00 -Avg ME (F77/GPU) = 4.3134710926110271 -Relative difference = 2.1036162350152416e-07 +Avg ME (F77/GPU) = 4.3134710926110280 +Relative difference = 2.1036162329561614e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.244015e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.296973e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.296973e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 4.865774 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.884407e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.935333e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.935333e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 5.676327 sec INFO: No Floating Point Exceptions have been reported - 15,938,569,834 cycles:u # 3.266 GHz (74.93%) - 10,443,496 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.92%) - 28,554,178 stalled-cycles-backend:u # 0.18% backend cycles idle (74.97%) - 42,358,216,399 instructions:u # 2.66 insn per cycle - # 0.00 stalled cycles per insn (75.05%) - 4.884255117 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 568) (avx2: 0) (512y: 0) (512z: 0) + 16,262,813,557 cycles # 2.863 GHz + 43,266,807,177 instructions # 2.66 insn per cycle + 5.681729392 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 662) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105795 Relative difference = 2.1036172727915933e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.956447e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.139265e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.139265e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 2.845800 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.290556e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.463505e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.463505e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.298765 sec INFO: No Floating Point Exceptions have been reported - 8,988,598,431 cycles:u # 3.142 GHz (74.84%) - 9,462,849 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.93%) - 701,322,943 stalled-cycles-backend:u # 7.80% backend cycles idle (75.07%) - 24,974,362,465 instructions:u # 2.78 insn per cycle - # 0.03 stalled cycles per insn (75.12%) - 2.864819100 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2130) (avx2: 0) (512y: 0) (512z: 0) + 9,454,937,516 cycles # 2.862 GHz + 25,430,832,847 instructions # 2.69 insn per cycle + 3.304226277 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2268) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105804 Relative difference = 2.103617270732513e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.318500e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.728288e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.728288e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 1.857120 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.695348e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.042916e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.042916e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.345615 sec INFO: No Floating Point Exceptions have been reported - 5,877,401,974 cycles:u # 3.139 GHz (74.82%) - 8,272,614 stalled-cycles-frontend:u # 0.14% frontend cycles idle (74.80%) - 1,152,017,174 stalled-cycles-backend:u # 19.60% backend cycles idle (74.90%) - 13,570,647,489 instructions:u # 2.31 insn per cycle - # 0.08 stalled cycles per insn (75.11%) - 1.876681707 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2610) (512y: 0) (512z: 0) + 6,296,882,273 cycles # 2.679 GHz + 13,638,682,807 instructions # 2.17 insn per cycle + 2.351107442 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2629) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.910957e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.286382e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.286382e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.246822 sec +INFO: No Floating Point Exceptions have been reported + 6,026,491,701 cycles # 2.677 GHz + 12,722,860,113 instructions # 2.11 insn per cycle + 2.252413644 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2146) (512y: 296) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134710926107935 +Relative difference = 2.103616776553298e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.420299e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.596534e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.596534e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.177504 sec +INFO: No Floating Point Exceptions have been reported + 5,627,100,070 cycles # 1.769 GHz + 8,928,441,764 instructions # 1.59 insn per cycle + 3.183062200 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1357) (512y: 171) (512z: 1777) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134710926107935 +Relative difference = 2.103616776553298e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt index bbd4bac714..a9079e9716 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt @@ -1,181 +1,228 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-05-16_16:48:29 +DATE: 2024-05-16_15:21:23 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.439069e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.202389e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.293837e+08 ) sec^-1 -MeanMatrixElemValue = ( 6.834692e+00 +- 1.463624e-01 ) GeV^0 -TOTAL : 0.326772 sec +EvtsPerSec[Rmb+ME] (23) = ( 9.566221e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.504693e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.775023e+08 ) sec^-1 +MeanMatrixElemValue = ( 7.154219e+00 +- 1.620281e-01 ) GeV^0 +TOTAL : 0.485925 sec INFO: No Floating Point Exceptions have been reported - 780,945,559 cycles:u # 2.231 GHz (73.89%) - 2,133,802 stalled-cycles-frontend:u # 0.27% frontend cycles idle (75.05%) - 5,808,164 stalled-cycles-backend:u # 0.74% backend cycles idle (75.53%) - 1,289,335,778 instructions:u # 1.65 insn per cycle - # 0.00 stalled cycles per insn (76.77%) - 0.377797926 seconds time elapsed + 2,003,287,538 cycles # 2.816 GHz + 2,880,414,118 instructions # 1.44 insn per cycle + 0.769648039 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 4.313524e+00 -Avg ME (F77/GPU) = 4.3135525460820645 -Relative difference = 6.617809954082434e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 4.313490e+00 +Avg ME (F77/GPU) = 4.3136695463908836 +Relative difference = 4.162439020000051e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.707985e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.781497e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.781497e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.315915e+00 +- 1.953829e-01 ) GeV^0 -TOTAL : 4.025408 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.938364e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.994818e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.994818e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.175644e+00 +- 1.658767e-01 ) GeV^0 +TOTAL : 5.499884 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 13,828,819,171 cycles:u # 3.428 GHz (74.96%) - 17,085,911 stalled-cycles-frontend:u # 0.12% frontend cycles idle (75.02%) - 990,078,579 stalled-cycles-backend:u # 7.16% backend cycles idle (75.02%) - 40,682,624,063 instructions:u # 2.94 insn per cycle - # 0.02 stalled cycles per insn (75.02%) - 4.038704610 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 598) (avx2: 0) (512y: 0) (512z: 0) + 15,743,516,639 cycles # 2.861 GHz + 42,225,863,593 instructions # 2.68 insn per cycle + 5.505101290 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 601) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313574e+00 -Avg ME (F77/C++) = 4.3135737644042820 -Relative difference = 5.461728906135488e-08 +Avg ME (F77/C++) = 4.3135739049175754 +Relative difference = 2.2042608890083832e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.009062e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.395404e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.395404e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.315915e+00 +- 1.953829e-01 ) GeV^0 -TOTAL : 1.896387 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.494085e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.834702e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.834702e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.175642e+00 +- 1.658767e-01 ) GeV^0 +TOTAL : 2.423560 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,434,798,098 cycles:u # 3.377 GHz (74.85%) - 11,977,350 stalled-cycles-frontend:u # 0.19% frontend cycles idle (74.83%) - 2,484,080,589 stalled-cycles-backend:u # 38.60% backend cycles idle (74.83%) - 16,319,812,185 instructions:u # 2.54 insn per cycle - # 0.15 stalled cycles per insn (74.95%) - 1.911059116 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2735) (avx2: 0) (512y: 0) (512z: 0) + 6,948,197,620 cycles # 2.861 GHz + 16,919,710,710 instructions # 2.44 insn per cycle + 2.428887408 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2983) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313573e+00 -Avg ME (F77/C++) = 4.3135733148083091 -Relative difference = 7.298086973342306e-08 +Avg ME (C++/C++) = 4.313572e+00 +Avg ME (F77/C++) = 4.3135722205042839 +Relative difference = 5.111872113533787e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.169158e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.311982e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.311982e+06 ) sec^-1 -MeanMatrixElemValue = ( 7.289197e+00 +- 1.809101e-01 ) GeV^0 -TOTAL : 1.047632 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.820914e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.816967e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.816967e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 1.429543 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 3,453,398,445 cycles:u # 3.267 GHz (75.03%) - 9,191,807 stalled-cycles-frontend:u # 0.27% frontend cycles idle (75.03%) - 1,188,577,015 stalled-cycles-backend:u # 34.42% backend cycles idle (75.03%) - 8,026,521,272 instructions:u # 2.32 insn per cycle - # 0.15 stalled cycles per insn (75.02%) - 1.060765861 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3309) (512y: 0) (512z: 0) + 3,855,960,900 cycles # 2.689 GHz + 7,989,689,028 instructions # 2.07 insn per cycle + 1.434693752 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3289) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313565e+00 -Avg ME (F77/C++) = 4.3135650876211002 -Relative difference = 2.03129199623388e-08 +Avg ME (F77/C++) = 4.3135645699221641 +Relative difference = 9.97035713074993e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.282128e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.407558e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.407558e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 1.355217 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 3,662,603,190 cycles # 2.693 GHz + 7,491,885,625 instructions # 2.05 insn per cycle + 1.360533114 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3036) (512y: 23) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313565e+00 +Avg ME (F77/C++) = 4.3135645699221641 +Relative difference = 9.97035713074993e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.072932e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.653576e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.653576e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 1.816585 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 3,322,287,385 cycles # 1.825 GHz + 5,988,754,595 instructions # 1.80 insn per cycle + 1.821834164 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2418) (512y: 32) (512z: 2031) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313564e+00 +Avg ME (F77/C++) = 4.3135643783025444 +Relative difference = 8.770069111236825e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt index 3c152fc040..0359df7b77 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt @@ -1,181 +1,228 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-05-16_16:48:41 +DATE: 2024-05-16_15:21:44 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.028132e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.444735e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.556414e+08 ) sec^-1 -MeanMatrixElemValue = ( 6.834692e+00 +- 1.463624e-01 ) GeV^0 -TOTAL : 0.327398 sec +EvtsPerSec[Rmb+ME] (23) = ( 9.575897e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.505600e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.778243e+08 ) sec^-1 +MeanMatrixElemValue = ( 7.154219e+00 +- 1.620281e-01 ) GeV^0 +TOTAL : 0.488373 sec INFO: No Floating Point Exceptions have been reported - 768,285,604 cycles:u # 2.176 GHz (74.98%) - 2,183,750 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.46%) - 4,903,137 stalled-cycles-backend:u # 0.64% backend cycles idle (75.41%) - 1,359,025,322 instructions:u # 1.77 insn per cycle - # 0.00 stalled cycles per insn (74.33%) - 0.379315567 seconds time elapsed + 2,007,752,645 cycles # 2.812 GHz + 2,828,437,251 instructions # 1.41 insn per cycle + 0.772837040 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 4.313524e+00 -Avg ME (F77/GPU) = 4.3135525460820645 -Relative difference = 6.617809954082434e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 4.313490e+00 +Avg ME (F77/GPU) = 4.3136695463908836 +Relative difference = 4.162439020000051e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.701216e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.774331e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.774331e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.315915e+00 +- 1.953829e-01 ) GeV^0 -TOTAL : 4.033796 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.991117e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.050649e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.050649e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.175644e+00 +- 1.658767e-01 ) GeV^0 +TOTAL : 5.356246 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 13,835,099,929 cycles:u # 3.422 GHz (74.94%) - 17,428,820 stalled-cycles-frontend:u # 0.13% frontend cycles idle (75.04%) - 484,816,878 stalled-cycles-backend:u # 3.50% backend cycles idle (75.07%) - 41,359,499,614 instructions:u # 2.99 insn per cycle - # 0.01 stalled cycles per insn (75.07%) - 4.047206130 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 555) (avx2: 0) (512y: 0) (512z: 0) + 15,339,535,429 cycles # 2.862 GHz + 42,474,905,629 instructions # 2.77 insn per cycle + 5.361339903 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 559) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313574e+00 -Avg ME (F77/C++) = 4.3135737563716248 -Relative difference = 5.647947044645654e-08 +Avg ME (F77/C++) = 4.3135739491553977 +Relative difference = 1.1787117204016727e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.839951e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.359656e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.359656e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.315915e+00 +- 1.953829e-01 ) GeV^0 -TOTAL : 1.683941 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.134209e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.583662e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.583662e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.175642e+00 +- 1.658767e-01 ) GeV^0 +TOTAL : 2.132369 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 5,586,160,818 cycles:u # 3.299 GHz (74.96%) - 12,340,652 stalled-cycles-frontend:u # 0.22% frontend cycles idle (74.96%) - 1,578,714,685 stalled-cycles-backend:u # 28.26% backend cycles idle (74.96%) - 16,066,756,783 instructions:u # 2.88 insn per cycle - # 0.10 stalled cycles per insn (74.97%) - 1.697057412 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2557) (avx2: 0) (512y: 0) (512z: 0) + 6,119,263,046 cycles # 2.864 GHz + 16,261,701,502 instructions # 2.66 insn per cycle + 2.137647028 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2702) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313573e+00 -Avg ME (F77/C++) = 4.3135733148083091 -Relative difference = 7.298086973342306e-08 +Avg ME (C++/C++) = 4.313572e+00 +Avg ME (F77/C++) = 4.3135722205042839 +Relative difference = 5.111872113533787e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 8.504699e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.235549e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.235549e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.289198e+00 +- 1.809101e-01 ) GeV^0 -TOTAL : 1.383506 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.498649e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.173623e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.173623e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 1.703269 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 4,638,091,860 cycles:u # 3.330 GHz (74.77%) - 9,866,921 stalled-cycles-frontend:u # 0.21% frontend cycles idle (74.75%) - 1,794,856,842 stalled-cycles-backend:u # 38.70% backend cycles idle (74.76%) - 10,093,674,408 instructions:u # 2.18 insn per cycle - # 0.18 stalled cycles per insn (75.02%) - 1.396837524 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3914) (512y: 0) (512z: 0) + 4,581,699,390 cycles # 2.683 GHz + 9,041,394,873 instructions # 1.97 insn per cycle + 1.708700782 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3558) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313565e+00 -Avg ME (F77/C++) = 4.3135650896001607 -Relative difference = 2.0771719231865954e-08 +Avg ME (F77/C++) = 4.3135645687580109 +Relative difference = 9.997345323075056e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.705142e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.424759e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.424759e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 1.652652 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 4,411,023,052 cycles # 2.662 GHz + 8,532,140,610 instructions # 1.93 insn per cycle + 1.658018216 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3311) (512y: 10) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313565e+00 +Avg ME (F77/C++) = 4.3135645687580109 +Relative difference = 9.997345323075056e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.118773e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.709641e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.709641e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 1.803301 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 3,302,699,675 cycles # 1.827 GHz + 5,958,419,273 instructions # 1.80 insn per cycle + 1.808538430 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2357) (512y: 32) (512z: 2014) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313564e+00 +Avg ME (F77/C++) = 4.3135643783025444 +Relative difference = 8.770069111236825e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt index e6690e6865..4345b3c851 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt @@ -1,105 +1,102 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-05-16_16:48:53 +DATE: 2024-05-16_15:22:04 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.584227e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.454852e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.497116e+07 ) sec^-1 -MeanMatrixElemValue = ( 7.088120e+00 +- 1.629041e-01 ) GeV^0 -TOTAL : 0.381980 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.596790e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.087710e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.182609e+08 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 0.531791 sec INFO: No Floating Point Exceptions have been reported - 918,668,026 cycles:u # 2.236 GHz (75.67%) - 2,058,167 stalled-cycles-frontend:u # 0.22% frontend cycles idle (75.45%) - 5,134,418 stalled-cycles-backend:u # 0.56% backend cycles idle (74.72%) - 1,480,506,140 instructions:u # 1.61 insn per cycle - # 0.00 stalled cycles per insn (74.72%) - 0.439181912 seconds time elapsed + 2,158,479,665 cycles # 2.816 GHz + 3,115,947,911 instructions # 1.44 insn per cycle + 0.824595914 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 4.313472e+00 Avg ME (F77/GPU) = 4.3134711012809239 Relative difference = 2.0835166567625394e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.327536e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.383060e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.383060e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 4.696016 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.739846e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.783362e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.783362e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 6.139062 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 16,143,011,940 cycles:u # 3.427 GHz (75.03%) - 38,177,717 stalled-cycles-frontend:u # 0.24% frontend cycles idle (75.04%) - 2,321,395,423 stalled-cycles-backend:u # 14.38% backend cycles idle (75.04%) - 41,577,988,005 instructions:u # 2.58 insn per cycle - # 0.06 stalled cycles per insn (75.04%) - 4.714763271 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 635) (avx2: 0) (512y: 0) (512z: 0) + 17,579,172,412 cycles # 2.862 GHz + 41,767,715,738 instructions # 2.38 insn per cycle + 6.144566394 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 655) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -107,32 +104,29 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.199625e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.399809e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.399809e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 2.686359 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.944235e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.080846e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.080846e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.674671 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 9,038,108,841 cycles:u # 3.346 GHz (74.87%) - 14,730,044 stalled-cycles-frontend:u # 0.16% frontend cycles idle (74.83%) - 1,762,459,713 stalled-cycles-backend:u # 19.50% backend cycles idle (74.93%) - 25,979,060,225 instructions:u # 2.87 insn per cycle - # 0.07 stalled cycles per insn (75.08%) - 2.704895398 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2393) (avx2: 0) (512y: 0) (512z: 0) + 10,157,870,701 cycles # 2.761 GHz + 26,355,211,403 instructions # 2.59 insn per cycle + 3.680088821 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2438) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -140,32 +134,29 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 7.420629e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.970166e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.970166e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 1.603532 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.512494e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.830362e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.830362e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.435332 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 5,323,273,363 cycles:u # 3.290 GHz (74.83%) - 18,468,340 stalled-cycles-frontend:u # 0.35% frontend cycles idle (74.78%) - 1,325,902,509 stalled-cycles-backend:u # 24.91% backend cycles idle (74.79%) - 12,235,378,310 instructions:u # 2.30 insn per cycle - # 0.11 stalled cycles per insn (75.00%) - 1.621743520 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2788) (512y: 0) (512z: 0) + 6,512,604,303 cycles # 2.669 GHz + 12,120,159,732 instructions # 1.86 insn per cycle + 2.440902409 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2718) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -173,9 +164,65 @@ Avg ME (F77/C++) = 4.3134712319139954 Relative difference = 1.7806676491157786e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.920988e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.300442e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.300442e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.244169 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 6,018,583,564 cycles # 2.676 GHz + 11,228,279,694 instructions # 1.87 insn per cycle + 2.249711111 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2369) (512y: 150) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134712319139954 +Relative difference = 1.7806676491157786e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.148571e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.297302e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.297302e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.442171 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 6,072,730,798 cycles # 1.762 GHz + 8,215,005,190 instructions # 1.35 insn per cycle + 3.447734816 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1787) (512y: 134) (512z: 1755) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134712319139954 +Relative difference = 1.7806676491157786e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt index d361db6bfb..fc67fec042 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt @@ -1,105 +1,102 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-05-16_16:49:08 +DATE: 2024-05-16_15:22:31 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.538104e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.390574e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.430013e+07 ) sec^-1 -MeanMatrixElemValue = ( 7.088120e+00 +- 1.629041e-01 ) GeV^0 -TOTAL : 0.379939 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.615689e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.096145e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.193163e+08 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 0.527662 sec INFO: No Floating Point Exceptions have been reported - 915,401,628 cycles:u # 2.244 GHz (74.99%) - 2,223,644 stalled-cycles-frontend:u # 0.24% frontend cycles idle (75.39%) - 5,072,697 stalled-cycles-backend:u # 0.55% backend cycles idle (74.53%) - 1,494,510,160 instructions:u # 1.63 insn per cycle - # 0.00 stalled cycles per insn (74.53%) - 0.438042335 seconds time elapsed + 2,187,091,067 cycles # 2.822 GHz + 3,143,599,790 instructions # 1.44 insn per cycle + 0.831715891 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 4.313472e+00 Avg ME (F77/GPU) = 4.3134711012809239 Relative difference = 2.0835166567625394e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.291427e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.346384e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.346384e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 4.765733 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.750132e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.794255e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.794255e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 6.103500 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 16,235,324,060 cycles:u # 3.396 GHz (74.90%) - 33,413,921 stalled-cycles-frontend:u # 0.21% frontend cycles idle (74.96%) - 65,906,967 stalled-cycles-backend:u # 0.41% backend cycles idle (75.04%) - 42,579,881,927 instructions:u # 2.62 insn per cycle - # 0.00 stalled cycles per insn (75.07%) - 4.784072749 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 577) (avx2: 0) (512y: 0) (512z: 0) + 17,473,867,626 cycles # 2.861 GHz + 43,052,630,037 instructions # 2.46 insn per cycle + 6.108967949 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 651) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -107,32 +104,29 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.139701e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.335093e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.335093e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 2.723584 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.176372e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.336517e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.336517e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.414423 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 9,025,902,683 cycles:u # 3.296 GHz (74.94%) - 15,519,832 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.89%) - 1,026,034,829 stalled-cycles-backend:u # 11.37% backend cycles idle (74.88%) - 24,993,447,379 instructions:u # 2.77 insn per cycle - # 0.04 stalled cycles per insn (74.89%) - 2.741735540 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2226) (avx2: 0) (512y: 0) (512z: 0) + 9,783,940,024 cycles # 2.862 GHz + 25,167,910,576 instructions # 2.57 insn per cycle + 3.420037518 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2276) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -140,32 +134,29 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.252836e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.647922e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.647922e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 1.867792 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.178030e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.451835e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.451835e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.622185 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,114,690,879 cycles:u # 3.248 GHz (74.93%) - 18,332,899 stalled-cycles-frontend:u # 0.30% frontend cycles idle (74.93%) - 1,563,155,302 stalled-cycles-backend:u # 25.56% backend cycles idle (74.94%) - 13,635,238,163 instructions:u # 2.23 insn per cycle - # 0.11 stalled cycles per insn (74.94%) - 1.885955565 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2930) (512y: 0) (512z: 0) + 7,019,924,583 cycles # 2.672 GHz + 12,790,606,448 instructions # 1.82 insn per cycle + 2.627804246 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2699) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -173,9 +164,65 @@ Avg ME (F77/C++) = 4.3134712319139954 Relative difference = 1.7806676491157786e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.488078e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.801083e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.801083e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.447720 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 6,546,937,322 cycles # 2.670 GHz + 12,109,881,739 instructions # 1.85 insn per cycle + 2.453162643 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2351) (512y: 227) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134712319139954 +Relative difference = 1.7806676491157786e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.983756e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.117708e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.117708e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.627336 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 6,377,996,877 cycles # 1.756 GHz + 8,984,744,450 instructions # 1.41 insn per cycle + 3.632964633 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1892) (512y: 178) (512z: 2083) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134712319139954 +Relative difference = 1.7806676491157786e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt index c083e9c540..f2a95b68c4 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-05-16_16:47:20 +DATE: 2024-05-16_15:19:32 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.249237e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.105605e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.108817e+03 ) sec^-1 -MeanMatrixElemValue = ( 5.989810e-05 +- 3.867612e-05 ) GeV^-4 -TOTAL : 0.397188 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.205899e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.229515e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.233614e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.467479 sec INFO: No Floating Point Exceptions have been reported - 1,004,692,712 cycles:u # 2.519 GHz (72.93%) - 2,262,405 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.51%) - 5,225,315 stalled-cycles-backend:u # 0.52% backend cycles idle (75.85%) - 1,406,860,225 instructions:u # 1.40 insn per cycle - # 0.00 stalled cycles per insn (75.73%) - 0.445011634 seconds time elapsed + 1,929,394,895 cycles # 2.809 GHz + 2,774,653,842 instructions # 1.44 insn per cycle + 0.745241861 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.670202e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.869175e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.870322e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.402315e-01 +- 3.184905e-01 ) GeV^-4 -TOTAL : 0.404451 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.854750e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.994181e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.003911e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 +TOTAL : 0.485365 sec INFO: No Floating Point Exceptions have been reported - 1,119,277,176 cycles:u # 2.635 GHz (74.63%) - 2,281,293 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.41%) - 5,778,701 stalled-cycles-backend:u # 0.52% backend cycles idle (75.54%) - 1,501,239,436 instructions:u # 1.34 insn per cycle - # 0.00 stalled cycles per insn (75.54%) - 0.448625365 seconds time elapsed + 1,990,830,698 cycles # 2.816 GHz + 2,942,277,354 instructions # 1.48 insn per cycle + 0.765598417 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 8.127459e-06 -Avg ME (F77/GPU) = 8.1274562860176587E-006 -Relative difference = 3.3392753387325367e-07 +Avg ME (F77/GPU) = 8.1274562860176604E-006 +Relative difference = 3.3392753366481633e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.033110e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.036614e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.036614e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.136579 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.339413e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.342602e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.342602e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.163415 sec INFO: No Floating Point Exceptions have been reported - 469,654,777 cycles:u # 3.361 GHz (71.58%) - 207,304 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.39%) - 65,129,868 stalled-cycles-backend:u # 13.87% backend cycles idle (77.07%) - 1,392,067,783 instructions:u # 2.96 insn per cycle - # 0.05 stalled cycles per insn (77.12%) - 0.142722037 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1926) (avx2: 0) (512y: 0) (512z: 0) + 474,956,853 cycles # 2.847 GHz + 1,396,923,375 instructions # 2.94 insn per cycle + 0.167372542 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3991) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274562860167168E-006 -Relative difference = 3.3392764976441195e-07 +Avg ME (F77/C++) = 8.1274562860167185E-006 +Relative difference = 3.339276495559746e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 9.240664e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.257925e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.257925e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.062711 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.350685e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.362490e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.362490e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.088282 sec INFO: No Floating Point Exceptions have been reported - 223,798,642 cycles:u # 3.393 GHz (73.30%) - 75,139 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.96%) - 21,166,758 stalled-cycles-backend:u # 9.46% backend cycles idle (75.76%) - 660,287,200 instructions:u # 2.95 insn per cycle - # 0.03 stalled cycles per insn (75.76%) - 0.069260875 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 9270) (avx2: 0) (512y: 0) (512z: 0) + 246,129,842 cycles # 2.680 GHz + 699,160,574 instructions # 2.84 insn per cycle + 0.092454839 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9501) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167168E-006 Relative difference = 3.3392764976441195e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.075126e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.084453e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.084453e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.031311 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.421076e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.426847e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.426847e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.042262 sec INFO: No Floating Point Exceptions have been reported - 103,447,525 cycles:u # 2.943 GHz (73.59%) - 79,219 stalled-cycles-frontend:u # 0.08% frontend cycles idle (77.29%) - 11,522,827 stalled-cycles-backend:u # 11.14% backend cycles idle (77.29%) - 230,968,164 instructions:u # 2.23 insn per cycle - # 0.05 stalled cycles per insn (77.28%) - 0.039268152 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8240) (512y: 0) (512z: 0) + 120,513,094 cycles # 2.641 GHz + 260,079,134 instructions # 2.16 insn per cycle + 0.046206481 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8227) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.614262e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.622122e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.622122e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.037855 sec +INFO: No Floating Point Exceptions have been reported + 109,022,775 cycles # 2.645 GHz + 240,308,972 instructions # 2.20 insn per cycle + 0.041904895 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7348) (512y: 150) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274562860174791E-006 +Relative difference = 3.3392755596761116e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.170349e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.175260e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.175260e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.050252 sec +INFO: No Floating Point Exceptions have been reported + 96,595,554 cycles # 1.802 GHz + 138,452,128 instructions # 1.43 insn per cycle + 0.054148545 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1692) (512y: 126) (512z: 6592) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274562860174791E-006 +Relative difference = 3.3392755596761116e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt index 6670b6deae..ca894b0a6d 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-05-16_16:47:27 +DATE: 2024-05-16_15:19:42 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.524444e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.326578e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.329845e+03 ) sec^-1 -MeanMatrixElemValue = ( 5.989810e-05 +- 3.867612e-05 ) GeV^-4 -TOTAL : 0.375805 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.237277e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.263102e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.267367e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.467317 sec INFO: No Floating Point Exceptions have been reported - 988,836,868 cycles:u # 2.489 GHz (74.90%) - 2,168,415 stalled-cycles-frontend:u # 0.22% frontend cycles idle (75.86%) - 5,335,512 stalled-cycles-backend:u # 0.54% backend cycles idle (75.86%) - 1,446,352,813 instructions:u # 1.46 insn per cycle - # 0.00 stalled cycles per insn (74.48%) - 0.424375867 seconds time elapsed + 1,933,877,717 cycles # 2.813 GHz + 2,829,779,417 instructions # 1.46 insn per cycle + 0.746133258 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.657513e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.864111e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.864991e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.402315e-01 +- 3.184905e-01 ) GeV^-4 -TOTAL : 0.402087 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.945887e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.087010e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.096853e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 +TOTAL : 0.483889 sec INFO: No Floating Point Exceptions have been reported - 1,123,109,105 cycles:u # 2.644 GHz (73.81%) - 2,150,436 stalled-cycles-frontend:u # 0.19% frontend cycles idle (74.58%) - 5,271,012 stalled-cycles-backend:u # 0.47% backend cycles idle (75.42%) - 1,495,903,512 instructions:u # 1.33 insn per cycle - # 0.00 stalled cycles per insn (75.54%) - 0.448148581 seconds time elapsed + 2,005,783,112 cycles # 2.816 GHz + 2,927,359,248 instructions # 1.46 insn per cycle + 0.768925329 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 8.127459e-06 -Avg ME (F77/GPU) = 8.1274562860176587E-006 -Relative difference = 3.3392753387325367e-07 +Avg ME (F77/GPU) = 8.1274562860176604E-006 +Relative difference = 3.3392753366481633e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.078771e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.082315e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.082315e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.134582 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.344408e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.347652e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.347652e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.162339 sec INFO: No Floating Point Exceptions have been reported - 471,983,208 cycles:u # 3.420 GHz (71.54%) - 148,000 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.37%) - 64,228,017 stalled-cycles-backend:u # 13.61% backend cycles idle (76.82%) - 1,385,303,937 instructions:u # 2.94 insn per cycle - # 0.05 stalled cycles per insn (76.82%) - 0.141959219 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1902) (avx2: 0) (512y: 0) (512z: 0) + 471,806,818 cycles # 2.848 GHz + 1,391,948,601 instructions # 2.95 insn per cycle + 0.166295977 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3869) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274562860167168E-006 -Relative difference = 3.3392764976441195e-07 +Avg ME (F77/C++) = 8.1274562860167185E-006 +Relative difference = 3.339276495559746e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 9.158759e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.175984e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.175984e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.062631 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.367799e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.379601e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.379601e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.087176 sec INFO: No Floating Point Exceptions have been reported - 200,577,982 cycles:u # 3.034 GHz (76.01%) - 82,947 stalled-cycles-frontend:u # 0.04% frontend cycles idle (75.82%) - 27,068,799 stalled-cycles-backend:u # 13.50% backend cycles idle (75.82%) - 657,741,134 instructions:u # 3.28 insn per cycle - # 0.04 stalled cycles per insn (75.82%) - 0.069339406 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 9325) (avx2: 0) (512y: 0) (512z: 0) + 243,999,829 cycles # 2.694 GHz + 695,186,413 instructions # 2.85 insn per cycle + 0.091139423 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9537) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167168E-006 Relative difference = 3.3392764976441195e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.082004e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.090663e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.090663e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.030350 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.395387e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.400899e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.400899e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.042187 sec INFO: No Floating Point Exceptions have been reported - 105,410,004 cycles:u # 3.128 GHz (73.38%) - 64,571 stalled-cycles-frontend:u # 0.06% frontend cycles idle (76.34%) - 12,580,128 stalled-cycles-backend:u # 11.93% backend cycles idle (76.29%) - 227,937,467 instructions:u # 2.16 insn per cycle - # 0.06 stalled cycles per insn (76.30%) - 0.037102587 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8196) (512y: 0) (512z: 0) + 119,801,052 cycles # 2.624 GHz + 255,741,591 instructions # 2.13 insn per cycle + 0.046174431 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8181) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.613988e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.621406e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.621406e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.037041 sec +INFO: No Floating Point Exceptions have been reported + 106,534,081 cycles # 2.639 GHz + 235,917,118 instructions # 2.21 insn per cycle + 0.041041363 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7301) (512y: 150) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274562860174791E-006 +Relative difference = 3.3392755596761116e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.167962e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.172897e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.172897e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.049548 sec +INFO: No Floating Point Exceptions have been reported + 94,554,513 cycles # 1.786 GHz + 133,899,064 instructions # 1.42 insn per cycle + 0.053428613 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1641) (512y: 126) (512z: 6597) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274562860174791E-006 +Relative difference = 3.3392755596761116e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt index a073daefbe..f86e27869e 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-05-16_16:47:34 +DATE: 2024-05-16_15:19:53 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.580358e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.790988e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.792117e+04 ) sec^-1 -MeanMatrixElemValue = ( 3.100300e-04 +- 2.256635e-04 ) GeV^-4 -TOTAL : 0.331774 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.541598e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.553658e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.556693e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.188142e-04 +- 6.565203e-04 ) GeV^-4 +TOTAL : 0.467629 sec INFO: No Floating Point Exceptions have been reported - 845,933,702 cycles:u # 2.396 GHz (74.12%) - 2,215,776 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.36%) - 5,136,072 stalled-cycles-backend:u # 0.61% backend cycles idle (74.36%) - 1,366,289,325 instructions:u # 1.62 insn per cycle - # 0.00 stalled cycles per insn (75.11%) - 0.377329106 seconds time elapsed + 1,964,166,954 cycles # 2.815 GHz + 2,823,406,286 instructions # 1.44 insn per cycle + 0.754117473 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.082122e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.036129e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.039928e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.017784e-02 +- 5.681015e-02 ) GeV^-4 -TOTAL : 0.343540 sec +EvtsPerSec[Rmb+ME] (23) = ( 9.614317e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.731134e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.742615e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.020493e-03 +- 4.025604e-03 ) GeV^-4 +TOTAL : 0.468434 sec INFO: No Floating Point Exceptions have been reported - 891,904,831 cycles:u # 2.445 GHz (74.08%) - 2,269,831 stalled-cycles-frontend:u # 0.25% frontend cycles idle (74.38%) - 5,457,551 stalled-cycles-backend:u # 0.61% backend cycles idle (75.71%) - 1,334,940,313 instructions:u # 1.50 insn per cycle - # 0.00 stalled cycles per insn (75.81%) - 0.388639407 seconds time elapsed + 1,946,164,211 cycles # 2.817 GHz + 2,847,399,547 instructions # 1.46 insn per cycle + 0.748191861 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 8.127320e-06 -Avg ME (F77/GPU) = 8.1275379236374627E-006 -Relative difference = 2.681371441780168e-05 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 8.127250e-06 +Avg ME (F77/GPU) = 8.1272870954487585E-006 +Relative difference = 4.564329725014175e-06 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.470121e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.474326e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.474326e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.274747e-01 +- 1.272814e-01 ) GeV^-4 -TOTAL : 0.123780 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.448019e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.451516e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.451516e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.177153e-04 +- 6.554185e-04 ) GeV^-4 +TOTAL : 0.158474 sec INFO: No Floating Point Exceptions have been reported - 413,295,293 cycles:u # 3.248 GHz (74.93%) - 89,291 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.86%) - 45,921,697 stalled-cycles-backend:u # 11.11% backend cycles idle (74.86%) - 1,294,421,183 instructions:u # 3.13 insn per cycle - # 0.04 stalled cycles per insn (74.86%) - 0.130474069 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1611) (avx2: 0) (512y: 0) (512z: 0) + 461,638,972 cycles # 2.852 GHz + 1,393,493,000 instructions # 3.02 insn per cycle + 0.162490485 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3070) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127810e-06 -Avg ME (F77/C++) = 8.1278100323291073E-006 -Relative difference = 3.977591502689147e-09 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127811e-06 +Avg ME (F77/C++) = 8.1278105211728276E-006 +Relative difference = 5.891219330978222e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.733679e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.739879e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.739879e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.274746e-01 +- 1.272813e-01 ) GeV^-4 -TOTAL : 0.036021 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.201120e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.205395e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.205395e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.177152e-04 +- 6.554185e-04 ) GeV^-4 +TOTAL : 0.048902 sec INFO: No Floating Point Exceptions have been reported - 113,806,570 cycles:u # 2.899 GHz (75.21%) - 81,457 stalled-cycles-frontend:u # 0.07% frontend cycles idle (79.65%) - 17,796,008 stalled-cycles-backend:u # 15.64% backend cycles idle (79.65%) - 342,773,119 instructions:u # 3.01 insn per cycle - # 0.05 stalled cycles per insn (79.65%) - 0.042511695 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 9799) (avx2: 0) (512y: 0) (512z: 0) + 138,099,810 cycles # 2.644 GHz + 375,723,801 instructions # 2.72 insn per cycle + 0.052805368 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:10134) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127807e-06 -Avg ME (F77/C++) = 8.1278071680283782E-006 -Relative difference = 2.0673273707686565e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127809e-06 +Avg ME (F77/C++) = 8.1278090510674588E-006 +Relative difference = 6.2830535070193674e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.069327e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.103583e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.103583e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.275185e-01 +- 1.273251e-01 ) GeV^-4 -TOTAL : 0.018562 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.699468e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.721720e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.721720e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 +TOTAL : 0.024500 sec INFO: No Floating Point Exceptions have been reported - 59,391,676 cycles:u # 2.673 GHz (73.35%) - 65,296 stalled-cycles-frontend:u # 0.11% frontend cycles idle (65.42%) - 4,060,421 stalled-cycles-backend:u # 6.84% backend cycles idle (64.09%) - 105,349,453 instructions:u # 1.77 insn per cycle - # 0.04 stalled cycles per insn (64.02%) - 0.025321924 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8980) (512y: 0) (512z: 0) + 72,431,086 cycles # 2.595 GHz + 146,734,646 instructions # 2.03 insn per cycle + 0.028413255 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8933) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127535e-06 -Avg ME (F77/C++) = 8.1275352476332691E-006 -Relative difference = 3.04684346075092e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127537e-06 +Avg ME (F77/C++) = 8.1275366216540664E-006 +Relative difference = 4.655111786058001e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.950281e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.979563e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.979563e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 +TOTAL : 0.023199 sec +INFO: No Floating Point Exceptions have been reported + 67,511,576 cycles # 2.517 GHz + 136,466,222 instructions # 2.02 insn per cycle + 0.027372188 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8164) (512y: 28) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127537e-06 +Avg ME (F77/C++) = 8.1275366216540664E-006 +Relative difference = 4.655111786058001e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.260359e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.280493e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.280493e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165747e-04 +- 6.542824e-04 ) GeV^-4 +TOTAL : 0.028479 sec +INFO: No Floating Point Exceptions have been reported + 59,124,236 cycles # 1.860 GHz + 85,286,285 instructions # 1.44 insn per cycle + 0.032355670 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2572) (512y: 32) (512z: 6935) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127537e-06 +Avg ME (F77/C++) = 8.1275369863475849E-006 +Relative difference = 1.6797726498700304e-09 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt index 21217c8e82..2af7dd76f9 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-05-16_16:47:40 +DATE: 2024-05-16_15:20:03 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.485272e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.698246e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.699297e+04 ) sec^-1 -MeanMatrixElemValue = ( 3.100300e-04 +- 2.256635e-04 ) GeV^-4 -TOTAL : 0.332618 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.561126e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.572400e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.575387e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.188142e-04 +- 6.565203e-04 ) GeV^-4 +TOTAL : 0.469592 sec INFO: No Floating Point Exceptions have been reported - 852,682,179 cycles:u # 2.412 GHz (74.20%) - 2,114,869 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.32%) - 5,227,861 stalled-cycles-backend:u # 0.61% backend cycles idle (76.09%) - 1,357,000,612 instructions:u # 1.59 insn per cycle - # 0.00 stalled cycles per insn (75.91%) - 0.380309350 seconds time elapsed + 1,933,901,131 cycles # 2.816 GHz + 2,803,636,036 instructions # 1.45 insn per cycle + 0.744726293 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.040010e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.913259e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.916790e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.017784e-02 +- 5.681015e-02 ) GeV^-4 -TOTAL : 0.343910 sec +EvtsPerSec[Rmb+ME] (23) = ( 9.901730e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.003706e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.005157e+06 ) sec^-1 +MeanMatrixElemValue = ( 8.020495e-03 +- 4.025606e-03 ) GeV^-4 +TOTAL : 0.471774 sec INFO: No Floating Point Exceptions have been reported - 902,430,277 cycles:u # 2.474 GHz (73.57%) - 2,175,860 stalled-cycles-frontend:u # 0.24% frontend cycles idle (75.42%) - 5,109,947 stalled-cycles-backend:u # 0.57% backend cycles idle (75.75%) - 1,338,775,832 instructions:u # 1.48 insn per cycle - # 0.00 stalled cycles per insn (75.92%) - 0.387743482 seconds time elapsed + 1,934,886,385 cycles # 2.815 GHz + 2,830,776,229 instructions # 1.46 insn per cycle + 0.746474254 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 8.127320e-06 -Avg ME (F77/GPU) = 8.1275379236391975E-006 -Relative difference = 2.681371463124516e-05 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 8.127250e-06 +Avg ME (F77/GPU) = 8.1272870252982758E-006 +Relative difference = 4.555698209723637e-06 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.491799e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.496021e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.496021e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.274747e-01 +- 1.272814e-01 ) GeV^-4 -TOTAL : 0.122598 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.452227e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.455705e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.455705e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.177153e-04 +- 6.554185e-04 ) GeV^-4 +TOTAL : 0.157329 sec INFO: No Floating Point Exceptions have been reported - 412,510,663 cycles:u # 3.266 GHz (74.78%) - 88,243 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.68%) - 48,686,355 stalled-cycles-backend:u # 11.80% backend cycles idle (74.68%) - 1,288,114,719 instructions:u # 3.12 insn per cycle - # 0.04 stalled cycles per insn (74.68%) - 0.129800445 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1591) (avx2: 0) (512y: 0) (512z: 0) + 458,573,657 cycles # 2.854 GHz + 1,388,574,447 instructions # 3.03 insn per cycle + 0.161242660 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2959) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127810e-06 -Avg ME (F77/C++) = 8.1278100323291073E-006 -Relative difference = 3.977591502689147e-09 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127811e-06 +Avg ME (F77/C++) = 8.1278105211728276E-006 +Relative difference = 5.891219330978222e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.658569e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.664875e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.664875e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.274746e-01 +- 1.272813e-01 ) GeV^-4 -TOTAL : 0.037202 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.204538e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.208976e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.208976e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.177152e-04 +- 6.554185e-04 ) GeV^-4 +TOTAL : 0.047932 sec INFO: No Floating Point Exceptions have been reported - 123,231,500 cycles:u # 3.020 GHz (64.45%) - 32,397 stalled-cycles-frontend:u # 0.03% frontend cycles idle (73.20%) - 17,917,241 stalled-cycles-backend:u # 14.54% backend cycles idle (80.43%) - 338,960,280 instructions:u # 2.75 insn per cycle - # 0.05 stalled cycles per insn (80.43%) - 0.045406964 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 9782) (avx2: 0) (512y: 0) (512z: 0) + 136,097,535 cycles # 2.652 GHz + 371,027,952 instructions # 2.73 insn per cycle + 0.051946079 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:10117) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127807e-06 -Avg ME (F77/C++) = 8.1278071680283782E-006 -Relative difference = 2.0673273707686565e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127809e-06 +Avg ME (F77/C++) = 8.1278090510674588E-006 +Relative difference = 6.2830535070193674e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.068604e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.102973e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.102973e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.275185e-01 +- 1.273251e-01 ) GeV^-4 -TOTAL : 0.017922 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.559391e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.580217e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.580217e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 +TOTAL : 0.024960 sec INFO: No Floating Point Exceptions have been reported - 37,375,206 cycles:u # 1.762 GHz (62.38%) - 76,397 stalled-cycles-frontend:u # 0.20% frontend cycles idle (62.38%) - 7,317,942 stalled-cycles-backend:u # 19.58% backend cycles idle (62.25%) - 163,822,146 instructions:u # 4.38 insn per cycle - # 0.04 stalled cycles per insn (75.96%) - 0.024185473 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8934) (512y: 0) (512z: 0) + 71,167,021 cycles # 2.517 GHz + 142,031,155 instructions # 2.00 insn per cycle + 0.028974311 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8887) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127535e-06 -Avg ME (F77/C++) = 8.1275352476332691E-006 -Relative difference = 3.04684346075092e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127537e-06 +Avg ME (F77/C++) = 8.1275366216540664E-006 +Relative difference = 4.655111786058001e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.102195e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.131341e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.131341e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 +TOTAL : 0.021142 sec +INFO: No Floating Point Exceptions have been reported + 63,906,261 cycles # 2.611 GHz + 131,729,034 instructions # 2.06 insn per cycle + 0.025029577 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8117) (512y: 28) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127537e-06 +Avg ME (F77/C++) = 8.1275366216540664E-006 +Relative difference = 4.655111786058001e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.321655e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.342179e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.342179e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165747e-04 +- 6.542824e-04 ) GeV^-4 +TOTAL : 0.027241 sec +INFO: No Floating Point Exceptions have been reported + 57,621,926 cycles # 1.879 GHz + 80,488,160 instructions # 1.40 insn per cycle + 0.031258526 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2521) (512y: 32) (512z: 6939) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127537e-06 +Avg ME (F77/C++) = 8.1275369863475849E-006 +Relative difference = 1.6797726498700304e-09 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt index 67d4ea5be4..16ac12981a 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-05-16_16:47:47 +DATE: 2024-05-16_15:20:13 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.973967e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.839341e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.842392e+03 ) sec^-1 -MeanMatrixElemValue = ( 5.989810e-05 +- 3.867612e-05 ) GeV^-4 -TOTAL : 0.389170 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.172533e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.195464e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.199217e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.467059 sec INFO: No Floating Point Exceptions have been reported - 1,049,671,631 cycles:u # 2.595 GHz (73.34%) - 2,145,491 stalled-cycles-frontend:u # 0.20% frontend cycles idle (74.13%) - 5,338,589 stalled-cycles-backend:u # 0.51% backend cycles idle (74.49%) - 1,484,554,929 instructions:u # 1.41 insn per cycle - # 0.00 stalled cycles per insn (75.03%) - 0.435834746 seconds time elapsed + 1,929,783,722 cycles # 2.812 GHz + 2,830,067,082 instructions # 1.47 insn per cycle + 0.744348567 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.645400e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.858752e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.859708e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.402315e-01 +- 3.184905e-01 ) GeV^-4 -TOTAL : 0.407362 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.817494e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.954472e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.963776e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 +TOTAL : 0.485210 sec INFO: No Floating Point Exceptions have been reported - 1,054,455,188 cycles:u # 2.486 GHz (75.29%) - 2,146,967 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.61%) - 5,177,929 stalled-cycles-backend:u # 0.49% backend cycles idle (75.98%) - 1,493,526,168 instructions:u # 1.42 insn per cycle - # 0.00 stalled cycles per insn (75.76%) - 0.451964984 seconds time elapsed + 1,989,265,248 cycles # 2.816 GHz + 2,972,405,087 instructions # 1.49 insn per cycle + 0.764721680 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 8.127459e-06 -Avg ME (F77/GPU) = 8.1274562879405183E-006 -Relative difference = 3.336909458255062e-07 +Avg ME (F77/GPU) = 8.1274562879405200E-006 +Relative difference = 3.3369094561706885e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.970330e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.973645e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.973645e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.138543 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.312127e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.315249e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.315249e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.164695 sec INFO: No Floating Point Exceptions have been reported - 464,427,899 cycles:u # 3.271 GHz (73.13%) - 76,464 stalled-cycles-frontend:u # 0.02% frontend cycles idle (71.92%) - 62,995,016 stalled-cycles-backend:u # 13.56% backend cycles idle (72.25%) - 1,444,762,442 instructions:u # 3.11 insn per cycle - # 0.04 stalled cycles per insn (75.08%) - 0.145335380 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1922) (avx2: 0) (512y: 0) (512z: 0) + 479,517,658 cycles # 2.854 GHz + 1,405,303,424 instructions # 2.93 insn per cycle + 0.168655160 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3977) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562948736117E-006 Relative difference = 3.32837900190667e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 9.063181e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.080166e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.080166e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.064146 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.589174e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.601629e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.601629e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.085009 sec INFO: No Floating Point Exceptions have been reported - 198,464,404 cycles:u # 2.934 GHz (76.37%) - 106,440 stalled-cycles-frontend:u # 0.05% frontend cycles idle (76.37%) - 22,274,758 stalled-cycles-backend:u # 11.22% backend cycles idle (76.37%) - 658,392,705 instructions:u # 3.32 insn per cycle - # 0.03 stalled cycles per insn (76.37%) - 0.071173080 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 9115) (avx2: 0) (512y: 0) (512z: 0) + 242,672,694 cycles # 2.748 GHz + 691,102,866 instructions # 2.85 insn per cycle + 0.088915527 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9324) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563175290919E-006 Relative difference = 3.3005037703909805e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.100149e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.108981e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.108981e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.030748 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.402863e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.409241e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.409241e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.042757 sec INFO: No Floating Point Exceptions have been reported - 106,500,188 cycles:u # 3.126 GHz (72.86%) - 75,830 stalled-cycles-frontend:u # 0.07% frontend cycles idle (76.61%) - 11,125,913 stalled-cycles-backend:u # 10.45% backend cycles idle (76.56%) - 229,525,028 instructions:u # 2.16 insn per cycle - # 0.05 stalled cycles per insn (76.56%) - 0.037202441 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8195) (512y: 0) (512z: 0) + 119,836,607 cycles # 2.596 GHz + 257,882,084 instructions # 2.15 insn per cycle + 0.046733316 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8244) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.611690e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.620124e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.620124e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.037856 sec +INFO: No Floating Point Exceptions have been reported + 108,462,768 cycles # 2.631 GHz + 238,127,423 instructions # 2.20 insn per cycle + 0.041890123 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7342) (512y: 146) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274563450143301E-006 +Relative difference = 3.266686019634872e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.150674e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.155466e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.155466e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.051477 sec +INFO: No Floating Point Exceptions have been reported + 99,538,839 cycles # 1.810 GHz + 139,339,349 instructions # 1.40 insn per cycle + 0.055665824 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1953) (512y: 122) (512z: 6323) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274563450143301E-006 +Relative difference = 3.266686019634872e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt index 7c7adbf4e6..96180e8a09 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt @@ -1,193 +1,233 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-05-16_16:47:53 +DATE: 2024-05-16_15:20:23 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.329669e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.100366e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.104247e+03 ) sec^-1 -MeanMatrixElemValue = ( 5.989810e-05 +- 3.867612e-05 ) GeV^-4 -TOTAL : 0.414191 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.207087e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.230616e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.234507e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.468179 sec INFO: No Floating Point Exceptions have been reported - 966,999,156 cycles:u # 2.339 GHz (74.01%) - 2,212,971 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.43%) - 5,017,574 stalled-cycles-backend:u # 0.52% backend cycles idle (74.75%) - 1,467,686,990 instructions:u # 1.52 insn per cycle - # 0.00 stalled cycles per insn (75.63%) - 0.460852954 seconds time elapsed + 1,938,727,271 cycles # 2.813 GHz + 2,835,562,501 instructions # 1.46 insn per cycle + 0.747262841 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.666830e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.874292e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.875374e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.402315e-01 +- 3.184905e-01 ) GeV^-4 -TOTAL : 0.428185 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.924846e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.065621e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.075056e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 +TOTAL : 0.482793 sec INFO: No Floating Point Exceptions have been reported - 1,078,414,057 cycles:u # 2.408 GHz (74.54%) - 2,243,483 stalled-cycles-frontend:u # 0.21% frontend cycles idle (75.90%) - 5,240,014 stalled-cycles-backend:u # 0.49% backend cycles idle (76.16%) - 1,523,411,562 instructions:u # 1.41 insn per cycle - # 0.00 stalled cycles per insn (75.37%) - 0.473470367 seconds time elapsed + 2,011,507,022 cycles # 2.818 GHz + 2,962,288,052 instructions # 1.47 insn per cycle + 0.770325801 seconds time elapsed ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 8.127459e-06 -Avg ME (F77/GPU) = 8.1274562879405183E-006 -Relative difference = 3.336909458255062e-07 +Avg ME (F77/GPU) = 8.1274562879405200E-006 +Relative difference = 3.3369094561706885e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.983715e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.987011e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.987011e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.138234 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.325014e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.328184e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.328184e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.163699 sec INFO: No Floating Point Exceptions have been reported - 471,358,578 cycles:u # 3.326 GHz (72.95%) - 286,150 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.79%) - 53,227,929 stalled-cycles-backend:u # 11.29% backend cycles idle (77.43%) - 1,400,682,647 instructions:u # 2.97 insn per cycle - # 0.04 stalled cycles per insn (77.43%) - 0.144832085 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1898) (avx2: 0) (512y: 0) (512z: 0) + 475,740,171 cycles # 2.851 GHz + 1,400,755,519 instructions # 2.94 insn per cycle + 0.167716370 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3871) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562948736117E-006 Relative difference = 3.32837900190667e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 9.214837e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.233304e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.233304e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.062301 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.586616e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.599028e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.599028e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.084604 sec INFO: No Floating Point Exceptions have been reported - 222,145,421 cycles:u # 3.362 GHz (73.31%) - 164,932 stalled-cycles-frontend:u # 0.07% frontend cycles idle (76.00%) - 23,024,841 stalled-cycles-backend:u # 10.36% backend cycles idle (75.81%) - 645,618,042 instructions:u # 2.91 insn per cycle - # 0.04 stalled cycles per insn (75.81%) - 0.069645029 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 9168) (avx2: 0) (512y: 0) (512z: 0) + 242,310,895 cycles # 2.753 GHz + 687,440,781 instructions # 2.84 insn per cycle + 0.088664129 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9365) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563175290919E-006 Relative difference = 3.3005037703909805e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.127492e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.136675e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.136675e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.029807 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.421509e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.427219e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.427219e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.041396 sec INFO: No Floating Point Exceptions have been reported - 101,417,942 cycles:u # 3.046 GHz (74.72%) - 76,113 stalled-cycles-frontend:u # 0.08% frontend cycles idle (76.01%) - 10,755,338 stalled-cycles-backend:u # 10.60% backend cycles idle (76.01%) - 224,352,321 instructions:u # 2.21 insn per cycle - # 0.05 stalled cycles per insn (76.01%) - 0.036561379 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8148) (512y: 0) (512z: 0) + 117,633,598 cycles # 2.630 GHz + 253,582,281 instructions # 2.16 insn per cycle + 0.045344869 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8196) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.533249e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.540083e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.540083e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.038782 sec +INFO: No Floating Point Exceptions have been reported + 106,121,372 cycles # 2.518 GHz + 233,883,831 instructions # 2.20 insn per cycle + 0.042791740 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7292) (512y: 146) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274563450143301E-006 +Relative difference = 3.266686019634872e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.148151e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.152898e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.152898e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.050273 sec +INFO: No Floating Point Exceptions have been reported + 95,562,086 cycles # 1.781 GHz + 134,760,547 instructions # 1.41 insn per cycle + 0.054201969 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1904) (512y: 122) (512z: 6323) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274563450143301E-006 +Relative difference = 3.266686019634872e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt index b9a455b78d..15f8e8659d 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-05-16_16:46:36 +DATE: 2024-05-16_15:18:21 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.624010e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.367574e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.801579e+08 ) sec^-1 -MeanMatrixElemValue = ( 4.239474e-01 +- 1.231432e-04 ) GeV^0 -TOTAL : 0.355596 sec +EvtsPerSec[Rmb+ME] (23) = ( 6.830621e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.798641e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.407520e+08 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.518896 sec INFO: No Floating Point Exceptions have been reported - 795,883,375 cycles:u # 2.164 GHz (73.59%) - 2,199,508 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.16%) - 5,328,268 stalled-cycles-backend:u # 0.67% backend cycles idle (74.73%) - 1,302,983,695 instructions:u # 1.64 insn per cycle - # 0.00 stalled cycles per insn (76.23%) - 0.408744876 seconds time elapsed + 2,130,015,467 cycles # 2.824 GHz + 3,049,782,764 instructions # 1.43 insn per cycle + 0.811167083 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 132 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 4.232897e-01 -Avg ME (F77/GPU) = 0.42328961386341935 -Relative difference = 2.0349321196791385e-07 +Avg ME (F77/GPU) = 0.42328961386341946 +Relative difference = 2.034932117056294e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.171586e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.330161e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.330161e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 1.087037 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.652167e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.115593e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.115593e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 1.200987 sec INFO: No Floating Point Exceptions have been reported - 3,510,794,127 cycles:u # 3.189 GHz (74.59%) - 8,350,329 stalled-cycles-frontend:u # 0.24% frontend cycles idle (74.67%) - 9,535,648 stalled-cycles-backend:u # 0.27% backend cycles idle (75.04%) - 8,527,000,615 instructions:u # 2.43 insn per cycle - # 0.00 stalled cycles per insn (75.29%) - 1.105132836 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 422) (avx2: 0) (512y: 0) (512z: 0) + 3,451,141,340 cycles # 2.863 GHz + 8,714,346,508 instructions # 2.53 insn per cycle + 1.206502072 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 458) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341951 Relative difference = 2.0349321157448718e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.165406e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.818486e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.818486e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 0.676108 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.615216e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.136998e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.136998e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.764589 sec INFO: No Floating Point Exceptions have been reported - 2,061,850,364 cycles:u # 2.986 GHz (74.63%) - 9,451,247 stalled-cycles-frontend:u # 0.46% frontend cycles idle (74.56%) - 17,131,190 stalled-cycles-backend:u # 0.83% backend cycles idle (74.52%) - 5,339,599,196 instructions:u # 2.59 insn per cycle - # 0.00 stalled cycles per insn (74.92%) - 0.694041955 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1199) (avx2: 0) (512y: 0) (512z: 0) + 2,197,801,743 cycles # 2.856 GHz + 5,465,338,789 instructions # 2.49 insn per cycle + 0.770190206 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1298) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341951 Relative difference = 2.0349321157448718e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.515946e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.232968e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.232968e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 0.490572 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.276018e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.408168e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.408168e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.576218 sec INFO: No Floating Point Exceptions have been reported - 1,400,660,046 cycles:u # 2.775 GHz (74.64%) - 8,301,019 stalled-cycles-frontend:u # 0.59% frontend cycles idle (74.70%) - 17,277,719 stalled-cycles-backend:u # 1.23% backend cycles idle (74.66%) - 3,131,495,578 instructions:u # 2.24 insn per cycle - # 0.01 stalled cycles per insn (74.64%) - 0.507920964 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1429) (512y: 0) (512z: 0) + 1,593,709,911 cycles # 2.743 GHz + 3,182,241,147 instructions # 2.00 insn per cycle + 0.581747530 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1459) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341946 Relative difference = 2.034932117056294e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.349428e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.560869e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.560869e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.561533 sec +INFO: No Floating Point Exceptions have been reported + 1,552,006,209 cycles # 2.741 GHz + 3,083,871,547 instructions # 1.99 insn per cycle + 0.567100846 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1274) (512y: 95) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.232897e-01 +Avg ME (F77/C++) = 0.42328961386341946 +Relative difference = 2.034932117056294e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.103380e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.012957e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.012957e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.614313 sec +INFO: No Floating Point Exceptions have been reported + 1,344,567,311 cycles # 2.171 GHz + 2,376,857,450 instructions # 1.77 insn per cycle + 0.619905839 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 584) (512y: 62) (512z: 953) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.232897e-01 +Avg ME (F77/C++) = 0.42328961386341946 +Relative difference = 2.034932117056294e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt index 16cae251dc..6add239f16 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-05-16_16:46:43 +DATE: 2024-05-16_15:18:33 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.532002e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.960780e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.579462e+08 ) sec^-1 -MeanMatrixElemValue = ( 4.239474e-01 +- 1.231432e-04 ) GeV^0 -TOTAL : 0.341639 sec +EvtsPerSec[Rmb+ME] (23) = ( 6.948407e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.328423e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.761410e+08 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.519601 sec INFO: No Floating Point Exceptions have been reported - 744,748,649 cycles:u # 2.053 GHz (75.09%) - 2,024,882 stalled-cycles-frontend:u # 0.27% frontend cycles idle (74.84%) - 5,540,587 stalled-cycles-backend:u # 0.74% backend cycles idle (74.84%) - 1,318,423,649 instructions:u # 1.77 insn per cycle - # 0.00 stalled cycles per insn (76.44%) - 0.395137586 seconds time elapsed + 2,123,926,879 cycles # 2.815 GHz + 2,991,717,095 instructions # 1.41 insn per cycle + 0.811782941 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 4.232897e-01 -Avg ME (F77/GPU) = 0.42328961386341935 -Relative difference = 2.0349321196791385e-07 +Avg ME (F77/GPU) = 0.42328961386341946 +Relative difference = 2.034932117056294e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.177846e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.335135e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.335135e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 1.081545 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.686449e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.122021e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.122021e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 1.196252 sec INFO: No Floating Point Exceptions have been reported - 3,482,832,137 cycles:u # 3.178 GHz (74.70%) - 8,844,725 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.05%) - 15,499,947 stalled-cycles-backend:u # 0.45% backend cycles idle (75.18%) - 8,525,375,009 instructions:u # 2.45 insn per cycle - # 0.00 stalled cycles per insn (75.18%) - 1.099594807 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 356) (avx2: 0) (512y: 0) (512z: 0) + 3,435,810,217 cycles # 2.862 GHz + 8,629,255,980 instructions # 2.51 insn per cycle + 1.201785163 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 403) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341951 Relative difference = 2.0349321157448718e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.167114e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.819518e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.819518e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 0.674010 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.590372e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.090308e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.090308e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.773787 sec INFO: No Floating Point Exceptions have been reported - 2,073,283,760 cycles:u # 3.014 GHz (74.53%) - 9,474,615 stalled-cycles-frontend:u # 0.46% frontend cycles idle (74.38%) - 17,215,413 stalled-cycles-backend:u # 0.83% backend cycles idle (74.60%) - 5,261,755,497 instructions:u # 2.54 insn per cycle - # 0.00 stalled cycles per insn (75.22%) - 0.691195150 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1165) (avx2: 0) (512y: 0) (512z: 0) + 2,172,281,754 cycles # 2.790 GHz + 5,399,686,889 instructions # 2.49 insn per cycle + 0.779398624 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1258) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341951 Relative difference = 2.0349321157448718e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.522414e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.235097e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.235097e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 0.488293 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.283822e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.420214e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.420214e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.573489 sec INFO: No Floating Point Exceptions have been reported - 1,394,369,363 cycles:u # 2.775 GHz (74.72%) - 8,267,581 stalled-cycles-frontend:u # 0.59% frontend cycles idle (74.58%) - 16,512,177 stalled-cycles-backend:u # 1.18% backend cycles idle (74.59%) - 3,133,423,950 instructions:u # 2.25 insn per cycle - # 0.01 stalled cycles per insn (74.53%) - 0.505600439 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1373) (512y: 0) (512z: 0) + 1,585,769,603 cycles # 2.741 GHz + 3,149,146,191 instructions # 1.99 insn per cycle + 0.579182812 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1386) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328961386341946 Relative difference = 2.034932117056294e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.354137e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.604902e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.604902e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.559958 sec +INFO: No Floating Point Exceptions have been reported + 1,547,131,577 cycles # 2.739 GHz + 3,062,437,995 instructions # 1.98 insn per cycle + 0.565482274 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1220) (512y: 95) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.232897e-01 +Avg ME (F77/C++) = 0.42328961386341946 +Relative difference = 2.034932117056294e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.108481e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.023241e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.023241e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.612188 sec +INFO: No Floating Point Exceptions have been reported + 1,354,565,413 cycles # 2.195 GHz + 2,362,076,089 instructions # 1.74 insn per cycle + 0.617754113 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 557) (512y: 62) (512z: 944) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.232897e-01 +Avg ME (F77/C++) = 0.42328961386341946 +Relative difference = 2.034932117056294e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt index 6b8b4b7ad6..35b822f8f6 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-05-16_16:46:51 +DATE: 2024-05-16_15:18:45 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.336288e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.319598e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.720811e+09 ) sec^-1 -MeanMatrixElemValue = ( 4.242352e-01 +- 1.228752e-04 ) GeV^0 -TOTAL : 0.316203 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.370205e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.202282e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.219119e+09 ) sec^-1 +MeanMatrixElemValue = ( 4.240325e-01 +- 1.231174e-04 ) GeV^0 +TOTAL : 0.481970 sec INFO: No Floating Point Exceptions have been reported - 745,531,179 cycles:u # 2.195 GHz (73.82%) - 2,167,811 stalled-cycles-frontend:u # 0.29% frontend cycles idle (74.03%) - 5,094,435 stalled-cycles-backend:u # 0.68% backend cycles idle (75.89%) - 1,190,444,919 instructions:u # 1.60 insn per cycle - # 0.00 stalled cycles per insn (76.57%) - 0.365656414 seconds time elapsed + 1,992,725,828 cycles # 2.818 GHz + 2,868,294,521 instructions # 1.44 insn per cycle + 0.764321619 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 72 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 4.232895e-01 -Avg ME (F77/GPU) = 0.42328966126660816 -Relative difference = 3.80984192091939e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 4.232893e-01 +Avg ME (F77/GPU) = 0.42328959883889183 +Relative difference = 7.059920764700599e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.304592e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.495176e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.495176e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241992e-01 +- 1.228718e-04 ) GeV^0 -TOTAL : 0.955824 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.685625e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.126627e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.126627e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 1.173857 sec INFO: No Floating Point Exceptions have been reported - 3,142,646,888 cycles:u # 3.256 GHz (75.02%) - 6,879,009 stalled-cycles-frontend:u # 0.22% frontend cycles idle (75.16%) - 7,453,801 stalled-cycles-backend:u # 0.24% backend cycles idle (75.14%) - 8,494,020,775 instructions:u # 2.70 insn per cycle - # 0.00 stalled cycles per insn (75.14%) - 0.969023811 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 516) (avx2: 0) (512y: 0) (512z: 0) + 3,371,653,633 cycles # 2.862 GHz + 8,663,374,999 instructions # 2.57 insn per cycle + 1.179087797 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 464) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 Avg ME (F77/C++) = 0.42328961598104797 Relative difference = 3.775440734888737e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.382427e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.124207e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.124207e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241992e-01 +- 1.228718e-04 ) GeV^0 -TOTAL : 0.463796 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.242831e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.476100e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.476100e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.559869 sec INFO: No Floating Point Exceptions have been reported - 1,400,689,385 cycles:u # 2.954 GHz (74.73%) - 7,018,882 stalled-cycles-frontend:u # 0.50% frontend cycles idle (74.70%) - 9,913,813 stalled-cycles-backend:u # 0.71% backend cycles idle (74.70%) - 3,706,220,717 instructions:u # 2.65 insn per cycle - # 0.00 stalled cycles per insn (74.75%) - 0.492260546 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1393) (avx2: 0) (512y: 0) (512z: 0) + 1,544,628,517 cycles # 2.742 GHz + 3,687,558,281 instructions # 2.39 insn per cycle + 0.565253973 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1472) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 -Avg ME (F77/C++) = 0.42328960620216094 -Relative difference = 1.4652287586288606e-08 +Avg ME (F77/C++) = 0.42328960439772345 +Relative difference = 1.0389396439618597e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.689283e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.323261e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.323261e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241992e-01 +- 1.228719e-04 ) GeV^0 -TOTAL : 0.376265 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.072720e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.536969e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.536969e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.431765 sec INFO: No Floating Point Exceptions have been reported - 1,095,520,511 cycles:u # 2.839 GHz (75.12%) - 6,398,970 stalled-cycles-frontend:u # 0.58% frontend cycles idle (75.13%) - 22,193,733 stalled-cycles-backend:u # 2.03% backend cycles idle (75.13%) - 2,338,728,203 instructions:u # 2.13 insn per cycle - # 0.01 stalled cycles per insn (75.13%) - 0.389199301 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1807) (512y: 0) (512z: 0) + 1,203,780,059 cycles # 2.758 GHz + 2,425,738,448 instructions # 2.02 insn per cycle + 0.436956710 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1835) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 -Avg ME (F77/C++) = 0.42328956839628518 -Relative difference = 7.466215756732981e-08 +Avg ME (F77/C++) = 0.42328956670826301 +Relative difference = 7.865002347873079e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.171115e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.846212e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.846212e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.420853 sec +INFO: No Floating Point Exceptions have been reported + 1,176,016,394 cycles # 2.764 GHz + 2,371,904,468 instructions # 2.02 insn per cycle + 0.426173333 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1716) (512y: 2) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.232896e-01 +Avg ME (F77/C++) = 0.42328956670826301 +Relative difference = 7.865002347873079e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.877260e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.908000e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.908000e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.456855 sec +INFO: No Floating Point Exceptions have been reported + 1,057,659,631 cycles # 2.291 GHz + 2,045,594,279 instructions # 1.93 insn per cycle + 0.462305299 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1125) (512y: 5) (512z: 1216) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.232896e-01 +Avg ME (F77/C++) = 0.42328957567224279 +Relative difference = 5.7473080363015266e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt index 4f2f50212a..7aff49b16c 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-05-16_16:46:58 +DATE: 2024-05-16_15:18:56 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.276474e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.320229e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.723645e+09 ) sec^-1 -MeanMatrixElemValue = ( 4.242352e-01 +- 1.228752e-04 ) GeV^0 -TOTAL : 0.321429 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.371360e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.210950e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.256375e+09 ) sec^-1 +MeanMatrixElemValue = ( 4.240325e-01 +- 1.231174e-04 ) GeV^0 +TOTAL : 0.480672 sec INFO: No Floating Point Exceptions have been reported - 765,125,856 cycles:u # 2.238 GHz (74.84%) - 2,127,653 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.03%) - 4,024,928 stalled-cycles-backend:u # 0.53% backend cycles idle (75.09%) - 1,274,853,767 instructions:u # 1.67 insn per cycle - # 0.00 stalled cycles per insn (77.21%) - 0.373458468 seconds time elapsed + 1,992,055,315 cycles # 2.814 GHz + 2,833,598,547 instructions # 1.42 insn per cycle + 0.764848194 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 71 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 4.232895e-01 -Avg ME (F77/GPU) = 0.42328966126660816 -Relative difference = 3.80984192091939e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 4.232893e-01 +Avg ME (F77/GPU) = 0.42328960436861962 +Relative difference = 7.190557844040413e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.328949e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.527922e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.527922e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241992e-01 +- 1.228718e-04 ) GeV^0 -TOTAL : 0.941970 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.763702e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.137508e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.137508e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 1.163446 sec INFO: No Floating Point Exceptions have been reported - 3,079,281,165 cycles:u # 3.236 GHz (74.61%) - 6,178,915 stalled-cycles-frontend:u # 0.20% frontend cycles idle (74.78%) - 9,066,673 stalled-cycles-backend:u # 0.29% backend cycles idle (74.81%) - 8,547,816,293 instructions:u # 2.78 insn per cycle - # 0.00 stalled cycles per insn (74.81%) - 0.955595287 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 379) (avx2: 0) (512y: 0) (512z: 0) + 3,338,476,373 cycles # 2.858 GHz + 8,537,550,948 instructions # 2.56 insn per cycle + 1.168736395 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 372) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 Avg ME (F77/C++) = 0.42328961598104797 Relative difference = 3.775440734888737e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.385350e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.149017e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.149017e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241992e-01 +- 1.228718e-04 ) GeV^0 -TOTAL : 0.463541 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.260122e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.497908e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.497908e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.555036 sec INFO: No Floating Point Exceptions have been reported - 1,397,571,134 cycles:u # 2.954 GHz (74.65%) - 7,037,611 stalled-cycles-frontend:u # 0.50% frontend cycles idle (74.64%) - 11,680,404 stalled-cycles-backend:u # 0.84% backend cycles idle (74.70%) - 3,670,865,817 instructions:u # 2.63 insn per cycle - # 0.00 stalled cycles per insn (74.70%) - 0.476825915 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1365) (avx2: 0) (512y: 0) (512z: 0) + 1,536,047,057 cycles # 2.745 GHz + 3,655,155,421 instructions # 2.38 insn per cycle + 0.560267212 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1417) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 -Avg ME (F77/C++) = 0.42328960620216094 -Relative difference = 1.4652287586288606e-08 +Avg ME (F77/C++) = 0.42328960439772345 +Relative difference = 1.0389396439618597e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.695540e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.339130e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.339130e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241992e-01 +- 1.228719e-04 ) GeV^0 -TOTAL : 0.376823 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.063874e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.501699e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.501699e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.432903 sec INFO: No Floating Point Exceptions have been reported - 1,079,351,160 cycles:u # 2.789 GHz (75.20%) - 6,706,529 stalled-cycles-frontend:u # 0.62% frontend cycles idle (75.20%) - 8,357,372 stalled-cycles-backend:u # 0.77% backend cycles idle (75.20%) - 2,355,555,888 instructions:u # 2.18 insn per cycle - # 0.00 stalled cycles per insn (75.19%) - 0.390366323 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1722) (512y: 0) (512z: 0) + 1,210,141,290 cycles # 2.765 GHz + 2,409,755,736 instructions # 1.99 insn per cycle + 0.438252635 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1739) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232896e-01 -Avg ME (F77/C++) = 0.42328956839628518 -Relative difference = 7.466215756732981e-08 +Avg ME (F77/C++) = 0.42328956670826301 +Relative difference = 7.865002347873079e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.166764e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.861571e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.861571e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.420903 sec +INFO: No Floating Point Exceptions have been reported + 1,178,969,939 cycles # 2.770 GHz + 2,360,225,770 instructions # 2.00 insn per cycle + 0.426183474 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1639) (512y: 2) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.232896e-01 +Avg ME (F77/C++) = 0.42328956670826301 +Relative difference = 7.865002347873079e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.911284e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.009343e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.009343e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240336e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.450059 sec +INFO: No Floating Point Exceptions have been reported + 1,050,992,336 cycles # 2.312 GHz + 2,030,439,704 instructions # 1.93 insn per cycle + 0.455402836 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1038) (512y: 5) (512z: 1206) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.232896e-01 +Avg ME (F77/C++) = 0.42328957567224279 +Relative difference = 5.7473080363015266e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt index c7066c8a74..abe970d6c3 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-05-16_16:47:05 +DATE: 2024-05-16_15:19:08 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.306885e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.364036e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.798270e+08 ) sec^-1 -MeanMatrixElemValue = ( 4.239474e-01 +- 1.231432e-04 ) GeV^0 -TOTAL : 0.363559 sec +EvtsPerSec[Rmb+ME] (23) = ( 6.820532e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.774843e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.362520e+08 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.522135 sec INFO: No Floating Point Exceptions have been reported - 762,883,474 cycles:u # 2.061 GHz (73.90%) - 2,138,647 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.39%) - 5,273,566 stalled-cycles-backend:u # 0.69% backend cycles idle (76.10%) - 1,266,871,946 instructions:u # 1.66 insn per cycle - # 0.00 stalled cycles per insn (76.25%) - 0.418126761 seconds time elapsed + 2,125,526,304 cycles # 2.816 GHz + 3,031,609,259 instructions # 1.43 insn per cycle + 0.813775431 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 132 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 4.232897e-01 -Avg ME (F77/GPU) = 0.42328961420809230 -Relative difference = 2.026789399531628e-07 +Avg ME (F77/GPU) = 0.42328961420809225 +Relative difference = 2.02678940084305e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.165373e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.317942e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.317942e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 1.093202 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.477506e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.093135e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.093135e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 1.221347 sec INFO: No Floating Point Exceptions have been reported - 3,517,886,438 cycles:u # 3.171 GHz (74.79%) - 8,156,251 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.78%) - 14,175,606 stalled-cycles-backend:u # 0.40% backend cycles idle (74.77%) - 8,647,070,613 instructions:u # 2.46 insn per cycle - # 0.00 stalled cycles per insn (74.94%) - 1.113015607 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 427) (avx2: 0) (512y: 0) (512z: 0) + 3,505,104,547 cycles # 2.859 GHz + 8,781,502,817 instructions # 2.51 insn per cycle + 1.226777715 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 466) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962565639783 Relative difference = 1.7563291089600324e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.201883e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.882372e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.882372e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 0.669302 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.650256e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.201424e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.201424e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.750812 sec INFO: No Floating Point Exceptions have been reported - 2,048,006,224 cycles:u # 2.992 GHz (74.29%) - 8,758,196 stalled-cycles-frontend:u # 0.43% frontend cycles idle (74.77%) - 12,292,199 stalled-cycles-backend:u # 0.60% backend cycles idle (75.30%) - 5,263,631,425 instructions:u # 2.57 insn per cycle - # 0.00 stalled cycles per insn (75.46%) - 0.687604394 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1260) (avx2: 0) (512y: 0) (512z: 0) + 2,158,593,065 cycles # 2.858 GHz + 5,461,970,761 instructions # 2.53 insn per cycle + 0.756427517 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1315) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962565639783 Relative difference = 1.7563291089600324e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.587206e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.385221e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.385221e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 0.484670 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.173052e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.222124e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.222124e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.600946 sec INFO: No Floating Point Exceptions have been reported - 1,381,448,051 cycles:u # 2.761 GHz (74.62%) - 8,400,102 stalled-cycles-frontend:u # 0.61% frontend cycles idle (74.48%) - 13,152,020 stalled-cycles-backend:u # 0.95% backend cycles idle (74.42%) - 3,058,571,921 instructions:u # 2.21 insn per cycle - # 0.00 stalled cycles per insn (74.43%) - 0.503638227 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1494) (512y: 0) (512z: 0) + 1,584,857,703 cycles # 2.630 GHz + 3,130,453,718 instructions # 1.98 insn per cycle + 0.606559761 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1508) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 -Avg ME (F77/C++) = 0.42328962559055894 -Relative difference = 1.757884518645067e-07 +Avg ME (F77/C++) = 0.42328962604218012 +Relative difference = 1.747215201983364e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.444228e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.788523e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.788523e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.544040 sec +INFO: No Floating Point Exceptions have been reported + 1,507,653,377 cycles # 2.746 GHz + 2,979,978,086 instructions # 1.98 insn per cycle + 0.549733637 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1266) (512y: 104) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.232897e-01 +Avg ME (F77/C++) = 0.42328962604218012 +Relative difference = 1.747215201983364e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.159766e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.131056e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.131056e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.601738 sec +INFO: No Floating Point Exceptions have been reported + 1,324,343,740 cycles # 2.183 GHz + 2,317,585,809 instructions # 1.75 insn per cycle + 0.607328338 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 708) (512y: 64) (512z: 1000) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.232897e-01 +Avg ME (F77/C++) = 0.42328962604218012 +Relative difference = 1.747215201983364e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt index a386adac7f..91c7a883f0 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-05-16_16:47:12 +DATE: 2024-05-16_15:19:20 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.003235e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.979938e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.601795e+08 ) sec^-1 -MeanMatrixElemValue = ( 4.239474e-01 +- 1.231432e-04 ) GeV^0 -TOTAL : 0.363255 sec +EvtsPerSec[Rmb+ME] (23) = ( 6.922874e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.310136e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.745093e+08 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.518290 sec INFO: No Floating Point Exceptions have been reported - 792,900,612 cycles:u # 2.144 GHz (75.31%) - 2,166,032 stalled-cycles-frontend:u # 0.27% frontend cycles idle (75.87%) - 5,227,505 stalled-cycles-backend:u # 0.66% backend cycles idle (75.33%) - 1,376,791,949 instructions:u # 1.74 insn per cycle - # 0.00 stalled cycles per insn (74.08%) - 0.420343005 seconds time elapsed + 2,124,893,311 cycles # 2.820 GHz + 3,045,592,907 instructions # 1.43 insn per cycle + 0.810370808 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 4.232897e-01 -Avg ME (F77/GPU) = 0.42328961420809230 -Relative difference = 2.026789399531628e-07 +Avg ME (F77/GPU) = 0.42328961420809225 +Relative difference = 2.02678940084305e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.175622e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.330964e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.330964e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 1.084986 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.542081e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.100861e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.100861e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 1.212162 sec INFO: No Floating Point Exceptions have been reported - 3,500,822,655 cycles:u # 3.182 GHz (74.57%) - 8,555,826 stalled-cycles-frontend:u # 0.24% frontend cycles idle (74.78%) - 14,017,103 stalled-cycles-backend:u # 0.40% backend cycles idle (75.13%) - 8,519,832,574 instructions:u # 2.43 insn per cycle - # 0.00 stalled cycles per insn (75.28%) - 1.104173538 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 358) (avx2: 0) (512y: 0) (512z: 0) + 3,479,876,909 cycles # 2.860 GHz + 8,693,142,752 instructions # 2.50 insn per cycle + 1.217788949 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 408) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962565639783 Relative difference = 1.7563291089600324e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.212101e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.890856e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.890856e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 0.666977 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.583309e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.076893e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.076893e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.776846 sec INFO: No Floating Point Exceptions have been reported - 2,034,100,023 cycles:u # 2.982 GHz (74.31%) - 8,738,451 stalled-cycles-frontend:u # 0.43% frontend cycles idle (74.88%) - 17,580,783 stalled-cycles-backend:u # 0.86% backend cycles idle (75.37%) - 5,244,786,981 instructions:u # 2.58 insn per cycle - # 0.00 stalled cycles per insn (75.37%) - 0.685345328 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1221) (avx2: 0) (512y: 0) (512z: 0) + 2,167,338,088 cycles # 2.773 GHz + 5,396,551,029 instructions # 2.49 insn per cycle + 0.782321373 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1286) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 Avg ME (F77/C++) = 0.42328962565639783 Relative difference = 1.7563291089600324e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 3.561486e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.346476e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.346476e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.241994e-01 +- 1.228720e-04 ) GeV^0 -TOTAL : 0.488701 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.326845e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.550286e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.550286e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.565802 sec INFO: No Floating Point Exceptions have been reported - 1,403,826,615 cycles:u # 2.784 GHz (73.88%) - 8,976,844 stalled-cycles-frontend:u # 0.64% frontend cycles idle (73.88%) - 16,525,914 stalled-cycles-backend:u # 1.18% backend cycles idle (74.62%) - 3,022,589,948 instructions:u # 2.15 insn per cycle - # 0.01 stalled cycles per insn (75.27%) - 0.507523094 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1430) (512y: 0) (512z: 0) + 1,565,712,129 cycles # 2.743 GHz + 3,096,211,416 instructions # 1.98 insn per cycle + 0.571442008 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1403) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.232897e-01 -Avg ME (F77/C++) = 0.42328962559055894 -Relative difference = 1.757884518645067e-07 +Avg ME (F77/C++) = 0.42328962604218012 +Relative difference = 1.747215201983364e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.453432e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.812851e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.812851e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.541762 sec +INFO: No Floating Point Exceptions have been reported + 1,501,240,710 cycles # 2.746 GHz + 2,962,583,104 instructions # 1.97 insn per cycle + 0.547343450 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1207) (512y: 104) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.232897e-01 +Avg ME (F77/C++) = 0.42328962604218012 +Relative difference = 1.747215201983364e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.179755e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.168512e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.168512e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.240339e-01 +- 1.231178e-04 ) GeV^0 +TOTAL : 0.595795 sec +INFO: No Floating Point Exceptions have been reported + 1,328,066,698 cycles # 2.210 GHz + 2,301,968,914 instructions # 1.73 insn per cycle + 0.601517736 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 669) (512y: 64) (512z: 987) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.232897e-01 +Avg ME (F77/C++) = 0.42328962604218012 +Relative difference = 1.747215201983364e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt index 6d05f96261..685cbca5b9 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_16:45:15 +DATE: 2024-05-16_15:16:05 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.879928e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.958359e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.012731e+07 ) sec^-1 -MeanMatrixElemValue = ( 3.295059e+00 +- 3.224567e-03 ) GeV^0 -TOTAL : 0.382968 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.742150e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.168430e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.277843e+08 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 0.532609 sec INFO: No Floating Point Exceptions have been reported - 852,165,853 cycles:u # 2.135 GHz (75.88%) - 2,112,629 stalled-cycles-frontend:u # 0.25% frontend cycles idle (76.16%) - 4,490,362 stalled-cycles-backend:u # 0.53% backend cycles idle (75.01%) - 1,418,720,820 instructions:u # 1.66 insn per cycle - # 0.00 stalled cycles per insn (74.20%) - 0.447542557 seconds time elapsed + 2,187,320,510 cycles # 2.847 GHz + 3,138,661,758 instructions # 1.43 insn per cycle + 0.825533767 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 3.234080e+00 -Avg ME (F77/GPU) = 3.2340795799595181 -Relative difference = 1.298794346312088e-07 +Avg ME (F77/GPU) = 3.2340795799595186 +Relative difference = 1.2987943449389332e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.541705e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.606940e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.606940e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 4.306705 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.052254e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.112326e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.112326e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 5.217611 sec INFO: No Floating Point Exceptions have been reported - 14,746,431,578 cycles:u # 3.414 GHz (75.00%) - 9,550,798 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.00%) - 1,128,989,684 stalled-cycles-backend:u # 7.66% backend cycles idle (75.00%) - 38,772,621,071 instructions:u # 2.63 insn per cycle - # 0.03 stalled cycles per insn (74.91%) - 4.328692731 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 726) (avx2: 0) (512y: 0) (512z: 0) + 15,171,088,318 cycles # 2.905 GHz + 38,379,828,637 instructions # 2.53 insn per cycle + 5.223033411 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 673) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340795799593964 Relative difference = 1.2987947225564713e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.347338e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.559630e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.559630e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 2.592991 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.483453e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.675957e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.675957e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 3.119586 sec INFO: No Floating Point Exceptions have been reported - 8,703,537,684 cycles:u # 3.341 GHz (74.89%) - 9,976,823 stalled-cycles-frontend:u # 0.11% frontend cycles idle (75.05%) - 853,729,768 stalled-cycles-backend:u # 9.81% backend cycles idle (75.13%) - 24,324,668,051 instructions:u # 2.79 insn per cycle - # 0.04 stalled cycles per insn (75.13%) - 2.608179496 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2067) (avx2: 0) (512y: 0) (512z: 0) + 9,050,575,942 cycles # 2.897 GHz + 24,585,418,505 instructions # 2.72 insn per cycle + 3.125051862 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 -Avg ME (F77/C++) = 3.2340795799593964 -Relative difference = 1.2987947225564713e-07 +Avg ME (F77/C++) = 3.2340795799593955 +Relative difference = 1.2987947253027805e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 7.569514e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.185817e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.185817e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 1.575070 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.531605e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.007383e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.007383e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 2.004395 sec INFO: No Floating Point Exceptions have been reported - 5,122,078,109 cycles:u # 3.228 GHz (74.83%) - 8,453,850 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.81%) - 68,227,274 stalled-cycles-backend:u # 1.33% backend cycles idle (74.81%) - 11,519,205,830 instructions:u # 2.25 insn per cycle - # 0.01 stalled cycles per insn (74.89%) - 1.590376059 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2399) (512y: 0) (512z: 0) + 5,470,487,475 cycles # 2.723 GHz + 11,258,117,341 instructions # 2.06 insn per cycle + 2.009874159 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2379) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 -Avg ME (F77/C++) = 3.2340795799594542 -Relative difference = 1.2987945440463624e-07 +Avg ME (F77/C++) = 3.2340795799594546 +Relative difference = 1.2987945426732077e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.034312e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.611178e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.611178e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 1.846817 sec +INFO: No Floating Point Exceptions have been reported + 4,937,000,755 cycles # 2.666 GHz + 10,562,656,233 instructions # 2.14 insn per cycle + 1.852346867 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 3.234080e+00 +Avg ME (F77/C++) = 3.2340795799594546 +Relative difference = 1.2987945426732077e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.686069e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.892849e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.892849e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 2.955560 sec +INFO: No Floating Point Exceptions have been reported + 5,363,967,162 cycles # 1.812 GHz + 7,798,816,647 instructions # 1.45 insn per cycle + 2.961128813 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1445) (512y: 122) (512z: 1545) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 3.234080e+00 +Avg ME (F77/C++) = 3.2340795799594546 +Relative difference = 1.2987945426732077e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt index 628ec0434d..e33bd01ef0 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_16:45:28 +DATE: 2024-05-16_15:16:29 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.841194e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.924897e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.978336e+07 ) sec^-1 -MeanMatrixElemValue = ( 3.295059e+00 +- 3.224567e-03 ) GeV^0 -TOTAL : 0.395777 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.734270e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.167895e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.277771e+08 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 0.531030 sec INFO: No Floating Point Exceptions have been reported - 859,912,650 cycles:u # 2.064 GHz (75.28%) - 2,183,975 stalled-cycles-frontend:u # 0.25% frontend cycles idle (76.01%) - 5,020,739 stalled-cycles-backend:u # 0.58% backend cycles idle (75.61%) - 1,382,370,967 instructions:u # 1.61 insn per cycle - # 0.00 stalled cycles per insn (74.66%) - 0.450533831 seconds time elapsed + 2,147,766,041 cycles # 2.808 GHz + 3,081,960,346 instructions # 1.43 insn per cycle + 0.823573588 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 3.234080e+00 -Avg ME (F77/GPU) = 3.2340795799595181 -Relative difference = 1.298794346312088e-07 +Avg ME (F77/GPU) = 3.2340795799595186 +Relative difference = 1.2987943449389332e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.422423e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.483471e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.483471e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 4.510005 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.072347e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.133952e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.133952e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 5.167480 sec INFO: No Floating Point Exceptions have been reported - 15,386,177,336 cycles:u # 3.403 GHz (74.91%) - 9,207,550 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.94%) - 18,497,567 stalled-cycles-backend:u # 0.12% backend cycles idle (75.02%) - 39,476,222,640 instructions:u # 2.57 insn per cycle - # 0.00 stalled cycles per insn (75.06%) - 4.525913205 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 597) (avx2: 0) (512y: 0) (512z: 0) + 15,011,121,904 cycles # 2.902 GHz + 40,101,107,795 instructions # 2.67 insn per cycle + 5.172969591 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 -Avg ME (F77/C++) = 3.2340795799593969 -Relative difference = 1.2987947211833165e-07 +Avg ME (F77/C++) = 3.2340795799593964 +Relative difference = 1.2987947225564713e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.382705e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.599240e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.599240e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 2.572662 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.643871e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.853935e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.853935e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 2.986462 sec INFO: No Floating Point Exceptions have been reported - 8,623,908,400 cycles:u # 3.337 GHz (74.97%) - 9,173,907 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.93%) - 1,146,019,295 stalled-cycles-backend:u # 13.29% backend cycles idle (74.93%) - 23,610,189,818 instructions:u # 2.74 insn per cycle - # 0.05 stalled cycles per insn (74.94%) - 2.587492591 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1947) (avx2: 0) (512y: 0) (512z: 0) + 8,687,902,361 cycles # 2.905 GHz + 23,671,582,038 instructions # 2.72 insn per cycle + 2.991891761 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2071) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 -Avg ME (F77/C++) = 3.2340795799593964 -Relative difference = 1.2987947225564713e-07 +Avg ME (F77/C++) = 3.2340795799593955 +Relative difference = 1.2987947253027805e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.943197e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.423167e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.423167e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 1.694513 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.688647e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.031946e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.031946e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 2.347118 sec INFO: No Floating Point Exceptions have been reported - 5,665,749,753 cycles:u # 3.318 GHz (74.76%) - 8,841,103 stalled-cycles-frontend:u # 0.16% frontend cycles idle (74.75%) - 614,722,978 stalled-cycles-backend:u # 10.85% backend cycles idle (74.98%) - 13,076,993,512 instructions:u # 2.31 insn per cycle - # 0.05 stalled cycles per insn (75.17%) - 1.710627514 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2559) (512y: 0) (512z: 0) + 6,408,205,490 cycles # 2.726 GHz + 13,061,009,362 instructions # 2.04 insn per cycle + 2.352705794 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2545) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 -Avg ME (F77/C++) = 3.2340795799594542 -Relative difference = 1.2987945440463624e-07 +Avg ME (F77/C++) = 3.2340795799594546 +Relative difference = 1.2987945426732077e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.217515e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.639971e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.639971e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 2.116902 sec +INFO: No Floating Point Exceptions have been reported + 5,786,103,959 cycles # 2.728 GHz + 12,322,398,791 instructions # 2.13 insn per cycle + 2.122365893 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2092) (512y: 294) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 3.234080e+00 +Avg ME (F77/C++) = 3.2340795799594546 +Relative difference = 1.2987945426732077e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.391355e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.565589e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.565589e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 3.201193 sec +INFO: No Floating Point Exceptions have been reported + 5,819,258,849 cycles # 1.816 GHz + 9,603,315,511 instructions # 1.65 insn per cycle + 3.206783116 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1509) (512y: 209) (512z: 1970) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 3.234080e+00 +Avg ME (F77/C++) = 3.2340795799594546 +Relative difference = 1.2987945426732077e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt index 12dcdcf4bd..fa2404eda0 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_16:45:42 +DATE: 2024-05-16_15:16:53 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.269455e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.916466e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.075106e+08 ) sec^-1 -MeanMatrixElemValue = ( 3.286435e+00 +- 3.209475e-03 ) GeV^0 -TOTAL : 0.322081 sec +EvtsPerSec[Rmb+ME] (23) = ( 9.806467e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.679043e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.988694e+08 ) sec^-1 +MeanMatrixElemValue = ( 3.294909e+00 +- 3.228140e-03 ) GeV^0 +TOTAL : 0.484472 sec INFO: No Floating Point Exceptions have been reported - 748,467,775 cycles:u # 2.155 GHz (75.25%) - 2,088,025 stalled-cycles-frontend:u # 0.28% frontend cycles idle (75.11%) - 4,948,506 stalled-cycles-backend:u # 0.66% backend cycles idle (75.08%) - 1,247,489,937 instructions:u # 1.67 insn per cycle - # 0.00 stalled cycles per insn (75.83%) - 0.385008032 seconds time elapsed + 2,024,107,607 cycles # 2.847 GHz + 2,925,717,340 instructions # 1.45 insn per cycle + 0.767822860 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 3.234089e+00 -Avg ME (F77/GPU) = 3.2340912986546755 -Relative difference = 7.107580142328097e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 3.234085e+00 +Avg ME (F77/GPU) = 3.2341253389604390 +Relative difference = 1.2473067479392238e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.964503e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.052988e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.052988e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287818e+00 +- 3.220110e-03 ) GeV^0 -TOTAL : 3.687002 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.190102e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.263149e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.263149e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294973e+00 +- 3.228584e-03 ) GeV^0 +TOTAL : 4.875075 sec INFO: No Floating Point Exceptions have been reported - 12,604,001,470 cycles:u # 3.411 GHz (74.94%) - 7,571,589 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.80%) - 798,909,126 stalled-cycles-backend:u # 6.34% backend cycles idle (74.85%) - 37,090,557,686 instructions:u # 2.94 insn per cycle - # 0.02 stalled cycles per insn (75.06%) - 3.699024002 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 607) (avx2: 0) (512y: 0) (512z: 0) + 14,157,231,167 cycles # 2.902 GHz + 38,349,372,496 instructions # 2.71 insn per cycle + 4.880360280 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 596) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234094e+00 -Avg ME (F77/C++) = 3.2340939850546420 -Relative difference = 4.621188450363643e-09 +Avg ME (F77/C++) = 3.2340941932052374 +Relative difference = 5.974014286114415e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.216562e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.634342e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.634342e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287818e+00 +- 3.220111e-03 ) GeV^0 -TOTAL : 1.833635 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.893708e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.295163e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.295163e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294972e+00 +- 3.228583e-03 ) GeV^0 +TOTAL : 2.231375 sec INFO: No Floating Point Exceptions have been reported - 6,104,178,774 cycles:u # 3.313 GHz (74.71%) - 6,853,294 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.84%) - 2,101,538,414 stalled-cycles-backend:u # 34.43% backend cycles idle (74.84%) - 15,190,745,209 instructions:u # 2.49 insn per cycle - # 0.14 stalled cycles per insn (75.00%) - 1.858522382 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2462) (avx2: 0) (512y: 0) (512z: 0) + 6,474,839,888 cycles # 2.896 GHz + 15,821,273,128 instructions # 2.44 insn per cycle + 2.236825857 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2693) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 3.234094e+00 -Avg ME (F77/C++) = 3.2340941177681088 -Relative difference = 3.641455970126884e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 3.234093e+00 +Avg ME (F77/C++) = 3.2340934062376618 +Relative difference = 1.2561100182708985e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 1.215158e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.372742e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.372742e+06 ) sec^-1 -MeanMatrixElemValue = ( 3.287796e+00 +- 3.219543e-03 ) GeV^0 -TOTAL : 1.009427 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.952001e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.027533e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.027533e+06 ) sec^-1 +MeanMatrixElemValue = ( 3.295004e+00 +- 3.229072e-03 ) GeV^0 +TOTAL : 1.258720 sec INFO: No Floating Point Exceptions have been reported - 3,315,467,966 cycles:u # 3.257 GHz (74.85%) - 7,383,317 stalled-cycles-frontend:u # 0.22% frontend cycles idle (74.85%) - 1,098,772,693 stalled-cycles-backend:u # 33.14% backend cycles idle (74.85%) - 7,732,872,347 instructions:u # 2.33 insn per cycle - # 0.14 stalled cycles per insn (74.88%) - 1.021036802 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3076) (512y: 0) (512z: 0) + 3,454,982,692 cycles # 2.735 GHz + 7,599,041,128 instructions # 2.20 insn per cycle + 1.263980564 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3054) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 3.234093e+00 -Avg ME (F77/C++) = 3.2340926420874894 -Relative difference = 1.1066858953654753e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 3.234092e+00 +Avg ME (F77/C++) = 3.2340919882990420 +Relative difference = 3.6180040581126224e-09 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.592851e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.112843e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.112843e+06 ) sec^-1 +MeanMatrixElemValue = ( 3.295004e+00 +- 3.229072e-03 ) GeV^0 +TOTAL : 1.180051 sec +INFO: No Floating Point Exceptions have been reported + 3,244,154,820 cycles # 2.739 GHz + 7,208,080,032 instructions # 2.22 insn per cycle + 1.185371954 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2854) (512y: 23) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 3.234092e+00 +Avg ME (F77/C++) = 3.2340919882990420 +Relative difference = 3.6180040581126224e-09 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.861599e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.601056e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.601056e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.295004e+00 +- 3.229072e-03 ) GeV^0 +TOTAL : 1.616099 sec +INFO: No Floating Point Exceptions have been reported + 3,061,871,050 cycles # 1.890 GHz + 5,840,738,200 instructions # 1.91 insn per cycle + 1.621459577 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2375) (512y: 24) (512z: 1889) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 3.234092e+00 +Avg ME (F77/C++) = 3.2340921289287508 +Relative difference = 3.986551736519174e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt index abda009b1a..17580b0829 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_16:45:54 +DATE: 2024-05-16_15:17:12 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.823498e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.096158e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.277282e+08 ) sec^-1 -MeanMatrixElemValue = ( 3.286435e+00 +- 3.209475e-03 ) GeV^0 -TOTAL : 0.322919 sec +EvtsPerSec[Rmb+ME] (23) = ( 9.907160e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.728602e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.048441e+08 ) sec^-1 +MeanMatrixElemValue = ( 3.294909e+00 +- 3.228140e-03 ) GeV^0 +TOTAL : 0.485743 sec INFO: No Floating Point Exceptions have been reported - 756,999,640 cycles:u # 2.187 GHz (74.49%) - 2,076,248 stalled-cycles-frontend:u # 0.27% frontend cycles idle (74.38%) - 5,285,191 stalled-cycles-backend:u # 0.70% backend cycles idle (76.34%) - 1,225,255,188 instructions:u # 1.62 insn per cycle - # 0.00 stalled cycles per insn (76.93%) - 0.375119226 seconds time elapsed + 2,023,423,533 cycles # 2.849 GHz + 2,905,255,031 instructions # 1.44 insn per cycle + 0.768600730 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 3.234089e+00 -Avg ME (F77/GPU) = 3.2340912986546755 -Relative difference = 7.107580142328097e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 3.234085e+00 +Avg ME (F77/GPU) = 3.2341253389604390 +Relative difference = 1.2473067479392238e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.954161e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.044245e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.044245e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287818e+00 +- 3.220110e-03 ) GeV^0 -TOTAL : 3.697492 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.168782e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.238544e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.238544e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294973e+00 +- 3.228584e-03 ) GeV^0 +TOTAL : 4.921731 sec INFO: No Floating Point Exceptions have been reported - 12,629,141,062 cycles:u # 3.408 GHz (74.96%) - 7,468,212 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.96%) - 9,965,210 stalled-cycles-backend:u # 0.08% backend cycles idle (74.96%) - 37,498,294,497 instructions:u # 2.97 insn per cycle - # 0.00 stalled cycles per insn (74.97%) - 3.710240742 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 500) (avx2: 0) (512y: 0) (512z: 0) + 14,314,886,956 cycles # 2.906 GHz + 39,834,092,366 instructions # 2.78 insn per cycle + 4.927032591 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 567) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234094e+00 -Avg ME (F77/C++) = 3.2340939850546420 -Relative difference = 4.621188450363643e-09 +Avg ME (F77/C++) = 3.2340941675938666 +Relative difference = 5.182096339328524e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 7.011776e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.586789e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.586789e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287818e+00 +- 3.220111e-03 ) GeV^0 -TOTAL : 1.652272 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.713515e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.269520e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.269520e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294972e+00 +- 3.228583e-03 ) GeV^0 +TOTAL : 1.922771 sec INFO: No Floating Point Exceptions have been reported - 5,332,655,349 cycles:u # 3.211 GHz (75.04%) - 6,983,998 stalled-cycles-frontend:u # 0.13% frontend cycles idle (74.96%) - 1,050,227,851 stalled-cycles-backend:u # 19.69% backend cycles idle (74.96%) - 15,177,218,540 instructions:u # 2.85 insn per cycle - # 0.07 stalled cycles per insn (74.95%) - 1.664682527 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2329) (avx2: 0) (512y: 0) (512z: 0) + 5,581,497,918 cycles # 2.896 GHz + 15,286,085,618 instructions # 2.74 insn per cycle + 1.928038449 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2473) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 3.234094e+00 -Avg ME (F77/C++) = 3.2340941177681088 -Relative difference = 3.641455970126884e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 3.234093e+00 +Avg ME (F77/C++) = 3.2340934062376618 +Relative difference = 1.2561100182708985e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 8.898362e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.711000e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.711000e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287796e+00 +- 3.219543e-03 ) GeV^0 -TOTAL : 1.325334 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.348339e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.987488e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.987488e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.295004e+00 +- 3.229072e-03 ) GeV^0 +TOTAL : 1.738529 sec INFO: No Floating Point Exceptions have been reported - 4,431,855,356 cycles:u # 3.323 GHz (74.81%) - 7,715,458 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.83%) - 1,686,083,115 stalled-cycles-backend:u # 38.04% backend cycles idle (74.83%) - 9,840,672,101 instructions:u # 2.22 insn per cycle - # 0.17 stalled cycles per insn (75.03%) - 1.339574896 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3749) (512y: 0) (512z: 0) + 4,748,584,350 cycles # 2.724 GHz + 9,734,762,909 instructions # 2.05 insn per cycle + 1.743720825 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3707) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 3.234093e+00 -Avg ME (F77/C++) = 3.2340926462784410 -Relative difference = 1.0937272340475427e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 3.234092e+00 +Avg ME (F77/C++) = 3.2340919817797840 +Relative difference = 5.633796441974414e-09 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.524514e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.201131e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.201131e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.295004e+00 +- 3.229072e-03 ) GeV^0 +TOTAL : 1.693263 sec +INFO: No Floating Point Exceptions have been reported + 4,630,030,488 cycles # 2.727 GHz + 9,326,323,775 instructions # 2.01 insn per cycle + 1.698452247 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3495) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 3.234092e+00 +Avg ME (F77/C++) = 3.2340919817797840 +Relative difference = 5.633796441974414e-09 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.566237e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.043529e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.043529e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.295004e+00 +- 3.229072e-03 ) GeV^0 +TOTAL : 1.970968 sec +INFO: No Floating Point Exceptions have been reported + 3,659,262,236 cycles # 1.853 GHz + 7,035,706,161 instructions # 1.92 insn per cycle + 1.976219857 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2608) (512y: 12) (512z: 2220) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 3.234092e+00 +Avg ME (F77/C++) = 3.2340921270661056 +Relative difference = 3.928957668408837e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt index 6b2529e9fc..b504154b8b 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_16:46:06 +DATE: 2024-05-16_15:17:33 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.837198e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.016409e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.072163e+07 ) sec^-1 -MeanMatrixElemValue = ( 3.295059e+00 +- 3.224567e-03 ) GeV^0 -TOTAL : 0.383476 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.734753e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.166290e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.275672e+08 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 0.527580 sec INFO: No Floating Point Exceptions have been reported - 901,453,430 cycles:u # 2.224 GHz (74.25%) - 2,305,493 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.35%) - 5,372,210 stalled-cycles-backend:u # 0.60% backend cycles idle (74.94%) - 1,511,664,096 instructions:u # 1.68 insn per cycle - # 0.00 stalled cycles per insn (74.27%) - 0.438320391 seconds time elapsed + 2,184,025,819 cycles # 2.852 GHz + 3,120,664,968 instructions # 1.43 insn per cycle + 0.822365132 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 3.234080e+00 -Avg ME (F77/GPU) = 3.2340795839181671 -Relative difference = 1.2865539287460837e-07 +Avg ME (F77/GPU) = 3.2340795839181666 +Relative difference = 1.2865539301192385e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.448788e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.510079e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.510079e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 4.465296 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.032702e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.091464e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.091464e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 5.267767 sec INFO: No Floating Point Exceptions have been reported - 15,159,032,142 cycles:u # 3.385 GHz (75.02%) - 9,318,044 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.00%) - 876,595,791 stalled-cycles-backend:u # 5.78% backend cycles idle (74.99%) - 39,258,289,836 instructions:u # 2.59 insn per cycle - # 0.02 stalled cycles per insn (75.00%) - 4.481904018 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 747) (avx2: 0) (512y: 0) (512z: 0) + 15,275,610,730 cycles # 2.898 GHz + 38,585,204,587 instructions # 2.53 insn per cycle + 5.273127531 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 677) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796721168488 Relative difference = 1.0138374786539113e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.439342e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.668048e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.668048e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 2.557997 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.478780e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.672331e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.672331e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 3.124457 sec INFO: No Floating Point Exceptions have been reported - 8,608,096,320 cycles:u # 3.348 GHz (74.84%) - 9,186,224 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.85%) - 1,586,729,408 stalled-cycles-backend:u # 18.43% backend cycles idle (75.01%) - 23,998,639,412 instructions:u # 2.79 insn per cycle - # 0.07 stalled cycles per insn (75.11%) - 2.574633110 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2102) (avx2: 0) (512y: 0) (512z: 0) + 8,951,368,692 cycles # 2.862 GHz + 24,230,346,765 instructions # 2.71 insn per cycle + 3.129932357 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2188) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796721168488 Relative difference = 1.0138374786539113e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 7.870031e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.493127e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.493127e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 1.516500 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.646169e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.144963e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.144963e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 1.966588 sec INFO: No Floating Point Exceptions have been reported - 5,014,619,133 cycles:u # 3.279 GHz (74.89%) - 8,998,353 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.91%) - 531,408,134 stalled-cycles-backend:u # 10.60% backend cycles idle (74.91%) - 11,425,617,471 instructions:u # 2.28 insn per cycle - # 0.05 stalled cycles per insn (74.89%) - 1.532893502 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2470) (512y: 0) (512z: 0) + 5,394,193,630 cycles # 2.737 GHz + 11,282,079,100 instructions # 2.09 insn per cycle + 1.972075346 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2483) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 -Avg ME (F77/C++) = 3.2340796844996675 -Relative difference = 9.755489429022839e-08 +Avg ME (F77/C++) = 3.2340796772295590 +Relative difference = 9.980286234148268e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.312770e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.933844e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.933844e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 1.769300 sec +INFO: No Floating Point Exceptions have been reported + 4,855,634,573 cycles # 2.737 GHz + 10,529,908,188 instructions # 2.17 insn per cycle + 1.774939787 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2170) (512y: 148) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 3.234080e+00 +Avg ME (F77/C++) = 3.2340796772295590 +Relative difference = 9.980286234148268e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.779051e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.993953e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.993953e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 2.883837 sec +INFO: No Floating Point Exceptions have been reported + 5,232,692,174 cycles # 1.812 GHz + 7,609,089,901 instructions # 1.45 insn per cycle + 2.889504238 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1633) (512y: 126) (512z: 1611) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 3.234080e+00 +Avg ME (F77/C++) = 3.2340796772295590 +Relative difference = 9.980286234148268e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt index 66608a5a11..62b069d661 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt @@ -1,175 +1,218 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS=-fopenmp FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make USEBUILDDIR=1 BACKEND=hip -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-05-16_16:46:19 +DATE: 2024-05-16_15:17:56 -On uan04 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 14.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.794547e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.927022e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.980434e+07 ) sec^-1 -MeanMatrixElemValue = ( 3.295059e+00 +- 3.224567e-03 ) GeV^0 -TOTAL : 0.381240 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.743856e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.168884e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.279553e+08 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 0.531580 sec INFO: No Floating Point Exceptions have been reported - 907,470,356 cycles:u # 2.238 GHz (74.21%) - 2,142,588 stalled-cycles-frontend:u # 0.24% frontend cycles idle (73.91%) - 5,471,565 stalled-cycles-backend:u # 0.60% backend cycles idle (74.41%) - 1,430,312,368 instructions:u # 1.58 insn per cycle - # 0.00 stalled cycles per insn (75.08%) - 0.433168931 seconds time elapsed + 2,155,818,187 cycles # 2.818 GHz + 3,085,690,683 instructions # 1.43 insn per cycle + 0.823819066 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/runTest_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 3.234080e+00 -Avg ME (F77/GPU) = 3.2340795839181671 -Relative difference = 1.2865539287460837e-07 +Avg ME (F77/GPU) = 3.2340795839181666 +Relative difference = 1.2865539301192385e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 2.407035e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.466607e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.466607e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 4.538384 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.002464e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.060011e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.060011e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 5.344548 sec INFO: No Floating Point Exceptions have been reported - 15,625,722,006 cycles:u # 3.433 GHz (74.95%) - 9,832,695 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.04%) - 68,139,082 stalled-cycles-backend:u # 0.44% backend cycles idle (75.04%) - 40,075,526,509 instructions:u # 2.56 insn per cycle - # 0.00 stalled cycles per insn (75.04%) - 5.094128203 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 631) (avx2: 0) (512y: 0) (512z: 0) + 15,331,700,326 cycles # 2.866 GHz + 40,369,778,421 instructions # 2.63 insn per cycle + 5.350011304 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796721168488 Relative difference = 1.0138374786539113e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 4.525563e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.755420e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.755420e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 2.499111 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.555017e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.755921e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.755921e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 3.059082 sec INFO: No Floating Point Exceptions have been reported - 8,491,243,161 cycles:u # 3.378 GHz (74.82%) - 10,450,945 stalled-cycles-frontend:u # 0.12% frontend cycles idle (74.87%) - 547,161,406 stalled-cycles-backend:u # 6.44% backend cycles idle (74.90%) - 23,487,414,757 instructions:u # 2.77 insn per cycle - # 0.02 stalled cycles per insn (75.06%) - 2.739310858 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1992) (avx2: 0) (512y: 0) (512z: 0) + 8,522,277,742 cycles # 2.782 GHz + 23,253,428,254 instructions # 2.73 insn per cycle + 3.064709896 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2090) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 Avg ME (F77/C++) = 3.2340796721168488 Relative difference = 1.0138374786539113e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 12.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 128 -EvtsPerSec[Rmb+ME] (23) = ( 6.869246e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.340004e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.340004e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.287639e+00 +- 3.218203e-03 ) GeV^0 -TOTAL : 1.709634 sec +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.699594e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.044812e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.044812e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 2.340476 sec INFO: No Floating Point Exceptions have been reported - 5,683,454,886 cycles:u # 3.299 GHz (74.96%) - 10,084,749 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.93%) - 600,467,137 stalled-cycles-backend:u # 10.57% backend cycles idle (74.94%) - 13,037,365,443 instructions:u # 2.29 insn per cycle - # 0.05 stalled cycles per insn (74.94%) - 1.726283384 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2710) (512y: 0) (512z: 0) + 6,239,696,903 cycles # 2.661 GHz + 12,963,096,678 instructions # 2.08 insn per cycle + 2.346005075 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2668) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe [ PASSED ] 3 tests. ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 3.234080e+00 -Avg ME (F77/C++) = 3.2340796844996675 -Relative difference = 9.755489429022839e-08 +Avg ME (F77/C++) = 3.2340796772295590 +Relative difference = 9.980286234148268e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.032659e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.430530e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.430530e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 2.191599 sec +INFO: No Floating Point Exceptions have been reported + 5,901,015,524 cycles # 2.687 GHz + 12,238,387,260 instructions # 2.07 insn per cycle + 2.197121947 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2208) (512y: 296) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 3.234080e+00 +Avg ME (F77/C++) = 3.2340796772295590 +Relative difference = 9.980286234148268e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.554826e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.745267e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.745267e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.294877e+00 +- 3.227953e-03 ) GeV^0 +TOTAL : 3.058656 sec +INFO: No Floating Point Exceptions have been reported + 5,596,491,041 cycles # 1.827 GHz + 8,743,545,379 instructions # 1.56 insn per cycle + 3.064278596 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1490) (512y: 183) (512z: 1908) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe +[ PASSED ] 3 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 3.234080e+00 +Avg ME (F77/C++) = 3.2340796772295590 +Relative difference = 9.980286234148268e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED From e33c1f71e28870ffe322119ba6b04336f0e870ac Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 21 May 2024 23:19:48 +0200 Subject: [PATCH 47/48] [jtmk2] revert to previous CODEGEN logs - prepare to merge the latest jtmk GITMB=$(git merge-base --fork-point jtmk HEAD) echo $GITMB ef3e741cdd8f7082347bb1e740ae2958fff32806 git checkout $GITMB $(git ls-tree --name-only $GITMB */CODEGEN*txt) --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 14 ++--- .../CODEGEN_cudacpp_ee_mumu_log.txt | 8 +-- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 16 ++--- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 8 +-- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 20 +++---- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 16 ++--- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 14 ++--- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 18 +++--- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 16 ++--- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 20 +++---- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 16 ++--- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 20 +++---- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 12 ++-- .../CODEGEN_mad_heft_gg_bb_log.txt | 14 ++--- .../CODEGEN_cudacpp_heft_gg_bb_log.txt | 8 +-- .../CODEGEN_mad_pp_tt012j_log.txt | 60 +++++++++---------- .../CODEGEN_mad_smeft_gg_tttt_log.txt | 18 +++--- .../CODEGEN_cudacpp_smeft_gg_tttt_log.txt | 12 ++-- .../CODEGEN_mad_susy_gg_t1t1_log.txt | 18 +++--- .../CODEGEN_cudacpp_susy_gg_t1t1_log.txt | 6 +- .../CODEGEN_mad_susy_gg_tt_log.txt | 14 ++--- .../CODEGEN_cudacpp_susy_gg_tt_log.txt | 12 ++-- 22 files changed, 180 insertions(+), 180 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index f49e898735..4f57b9ac54 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005671262741088867  +DEBUG: model prefixing takes 0.0058667659759521484  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -177,7 +177,7 @@ INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 INFO: Creating files in directory P1_epem_mupmum DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -199,14 +199,14 @@ ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.209 s +ALOHA: aloha creates 3 routines in 0.218 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.266 s +ALOHA: aloha creates 7 routines in 0.269 s FFV1 FFV1 FFV2 @@ -250,9 +250,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.937s -user 0m1.729s -sys 0m0.202s +real 0m1.957s +user 0m1.719s +sys 0m0.230s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index 63dc9fe355..69249db5f4 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005807399749755859  +DEBUG: model prefixing takes 0.005859851837158203  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -184,7 +184,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.283 s +ALOHA: aloha creates 4 routines in 0.281 s FFV1 FFV1 FFV2 @@ -203,7 +203,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. quit -real 0m0.706s -user 0m0.637s +real 0m0.756s +user 0m0.632s sys 0m0.065s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index 2bef77ca41..6210200f50 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005599498748779297  +DEBUG: model prefixing takes 0.005722522735595703  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -178,7 +178,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -193,8 +193,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx -Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.106 s +Generated helas calls for 1 subprocesses (3 diagrams) in 0.007 s +Wrote files for 10 helas calls in 0.109 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines @@ -203,7 +203,7 @@ ALOHA: aloha creates 2 routines in 0.154 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.138 s +ALOHA: aloha creates 4 routines in 0.139 s VVV1 FFV1 FFV1 @@ -239,9 +239,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.767s -user 0m1.542s -sys 0m0.218s +real 0m1.779s +user 0m1.579s +sys 0m0.196s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index 0e530e2a27..c8f652402b 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0057544708251953125  +DEBUG: model prefixing takes 0.0057947635650634766  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -199,6 +199,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/s quit real 0m0.555s -user 0m0.507s -sys 0m0.045s -Code generation completed in 0 seconds +user 0m0.490s +sys 0m0.062s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index 213d94caf7..20014f621c 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005837678909301758  +DEBUG: model prefixing takes 0.005769014358520508  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -188,7 +188,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P2_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -205,7 +205,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -220,15 +220,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx -Generated helas calls for 2 subprocesses (19 diagrams) in 0.046 s -Wrote files for 46 helas calls in 0.262 s +Generated helas calls for 2 subprocesses (19 diagrams) in 0.045 s +Wrote files for 46 helas calls in 0.257 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.352 s +ALOHA: aloha creates 5 routines in 0.345 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -236,7 +236,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.336 s +ALOHA: aloha creates 10 routines in 0.328 s VVV1 VVV1 FFV1 @@ -285,9 +285,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.470s -user 0m2.209s -sys 0m0.245s +real 0m2.399s +user 0m2.169s +sys 0m0.231s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index 7ddb1403fe..fb93c2a74e 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005815982818603516  +DEBUG: model prefixing takes 0.0058596134185791016  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -178,7 +178,7 @@ INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -193,15 +193,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg -Generated helas calls for 1 subprocesses (16 diagrams) in 0.039 s -Wrote files for 36 helas calls in 0.156 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.040 s +Wrote files for 36 helas calls in 0.158 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.345 s +ALOHA: aloha creates 5 routines in 0.342 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -254,9 +254,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.267s -user 0m2.045s -sys 0m0.224s +real 0m2.268s +user 0m2.048s +sys 0m0.221s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index 0bba3094b0..0b2e5c2a4c 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005731821060180664  +DEBUG: model prefixing takes 0.005775928497314453  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -178,7 +178,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. -Generated helas calls for 1 subprocesses (16 diagrams) in 0.038 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.039 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -186,7 +186,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.340 s +ALOHA: aloha creates 5 routines in 0.343 s VVV1 VVV1 FFV1 @@ -206,7 +206,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. quit -real 0m0.811s -user 0m0.755s -sys 0m0.052s -Code generation completed in 1 seconds +real 0m0.818s +user 0m0.763s +sys 0m0.051s +Code generation completed in 0 seconds diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index 615cb5fda7..b0eb4bc117 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005845069885253906  +DEBUG: model prefixing takes 0.005640506744384766  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.169 s +1 processes with 123 diagrams generated in 0.166 s Total: 1 processes with 123 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -178,7 +178,7 @@ INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 INFO: Creating files in directory P1_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -193,15 +193,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg -Generated helas calls for 1 subprocesses (123 diagrams) in 0.453 s -Wrote files for 222 helas calls in 0.744 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.449 s +Wrote files for 222 helas calls in 0.729 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.357 s +ALOHA: aloha creates 5 routines in 0.349 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -257,9 +257,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.464s -user 0m3.220s -sys 0m0.237s +real 0m3.426s +user 0m3.165s +sys 0m0.251s Code generation completed in 4 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index 994a65898e..01e2e1e58f 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005870342254638672  +DEBUG: model prefixing takes 0.005846738815307617  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.167 s +1 processes with 123 diagrams generated in 0.165 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT @@ -178,7 +178,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.454 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.447 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -186,7 +186,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.346 s +ALOHA: aloha creates 5 routines in 0.332 s VVV1 VVV1 FFV1 @@ -209,7 +209,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. quit -real 0m1.541s -user 0m1.470s -sys 0m0.063s -Code generation completed in 1 seconds +real 0m1.503s +user 0m1.439s +sys 0m0.058s +Code generation completed in 2 seconds diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index 83bfdf174d..d01addebe9 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005798816680908203  +DEBUG: model prefixing takes 0.0058705806732177734  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.969 s +1 processes with 1240 diagrams generated in 1.979 s Total: 1 processes with 1240 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -180,7 +180,7 @@ INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] INFO: Color-Flow passed to 1630 term in 8s. Introduce 3030 contraction DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -195,15 +195,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.936 s -Wrote files for 2281 helas calls in 19.465 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.941 s +Wrote files for 2281 helas calls in 19.565 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.334 s +ALOHA: aloha creates 5 routines in 0.335 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -211,7 +211,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.328 s +ALOHA: aloha creates 10 routines in 0.330 s VVV1 VVV1 FFV1 @@ -259,9 +259,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m30.712s -user 0m30.183s -sys 0m0.428s +real 0m30.825s +user 0m30.307s +sys 0m0.411s Code generation completed in 31 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index c96b19803e..f69ae47bf3 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005761861801147461  +DEBUG: model prefixing takes 0.005766630172729492  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.975 s +1 processes with 1240 diagrams generated in 1.995 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -178,7 +178,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.951 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.886 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -186,7 +186,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.366 s +ALOHA: aloha creates 5 routines in 0.368 s VVV1 VVV1 FFV1 @@ -209,7 +209,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. quit -real 0m13.758s -user 0m13.574s -sys 0m0.124s -Code generation completed in 14 seconds +real 0m13.671s +user 0m13.504s +sys 0m0.117s +Code generation completed in 13 seconds diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index 4bd29bfc4f..b5ebeb4b75 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005829811096191406  +DEBUG: model prefixing takes 0.0058400630950927734  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -170,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.082 s +8 processes with 40 diagrams generated in 0.084 s Total: 8 processes with 40 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -201,7 +201,7 @@ INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -218,7 +218,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -234,16 +234,16 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux Generated helas calls for 2 subprocesses (10 diagrams) in 0.033 s -Wrote files for 32 helas calls in 0.231 s +Wrote files for 32 helas calls in 0.234 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.152 s +ALOHA: aloha creates 2 routines in 0.155 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.138 s +ALOHA: aloha creates 4 routines in 0.141 s FFV1 FFV1 FFV1 @@ -296,9 +296,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.004s -user 0m1.777s -sys 0m0.228s +real 0m2.038s +user 0m1.818s +sys 0m0.212s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index f85f286cbd..71494815bb 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005831241607666016  +DEBUG: model prefixing takes 0.005812883377075195  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -170,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.081 s +8 processes with 40 diagrams generated in 0.084 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Load PLUGIN.CUDACPP_OUTPUT @@ -215,7 +215,7 @@ Generated helas calls for 2 subprocesses (10 diagrams) in 0.032 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.151 s +ALOHA: aloha creates 2 routines in 0.153 s FFV1 FFV1 FFV1 @@ -231,7 +231,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. quit -real 0m0.673s -user 0m0.617s -sys 0m0.052s +real 0m0.686s +user 0m0.624s +sys 0m0.058s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt index 4d98c8255b..8cee1a79cc 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt @@ -150,7 +150,7 @@ INFO: Generating Helas calls for process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Processing color information for process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Creating files in directory P1_gg_bbx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -166,20 +166,20 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Finding symmetric diagrams for subprocess group gg_bbx Generated helas calls for 1 subprocesses (4 diagrams) in 0.009 s -Wrote files for 12 helas calls in 0.110 s +Wrote files for 12 helas calls in 0.111 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.277 s +ALOHA: aloha creates 4 routines in 0.276 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 8 routines in 0.262 s +ALOHA: aloha creates 8 routines in 0.261 s VVS3 VVV1 FFV1 @@ -217,9 +217,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.013s -user 0m1.774s -sys 0m0.241s +real 0m2.011s +user 0m1.802s +sys 0m0.211s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt index 6201e51594..69fbe369c1 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt @@ -174,7 +174,7 @@ INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. quit -real 0m0.665s -user 0m0.608s -sys 0m0.053s -Code generation completed in 1 seconds +real 0m0.670s +user 0m0.604s +sys 0m0.062s +Code generation completed in 0 seconds diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index e3fed5eb5f..b0fc10707c 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005815029144287109  +DEBUG: model prefixing takes 0.005956411361694336  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -172,7 +172,7 @@ INFO: Process u~ u > t t~ added to mirror process u u~ > t t~ INFO: Process c~ c > t t~ added to mirror process c c~ > t t~ INFO: Process d~ d > t t~ added to mirror process d d~ > t t~ INFO: Process s~ s > t t~ added to mirror process s s~ > t t~ -5 processes with 7 diagrams generated in 0.031 s +5 processes with 7 diagrams generated in 0.032 s Total: 5 processes with 7 diagrams add process p p > t t~ j @1 INFO: Checking for minimal orders which gives processes. @@ -212,7 +212,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.143 s +13 processes with 76 diagrams generated in 0.147 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -378,7 +378,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.940 s +65 processes with 1119 diagrams generated in 1.975 s Total: 83 processes with 1202 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -500,7 +500,7 @@ INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Creating files in directory P2_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -517,7 +517,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg INFO: Creating files in directory P2_gg_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -534,7 +534,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux INFO: Creating files in directory P2_gu_ttxgu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -551,7 +551,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu INFO: Creating files in directory P2_gux_ttxgux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -568,7 +568,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux INFO: Creating files in directory P2_uux_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -585,7 +585,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -602,7 +602,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P2_uu_ttxuu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -619,7 +619,7 @@ INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu INFO: Creating files in directory P2_uux_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -636,7 +636,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux INFO: Creating files in directory P2_uxux_ttxuxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -653,7 +653,7 @@ INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux INFO: Creating files in directory P2_uc_ttxuc DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -670,7 +670,7 @@ INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc INFO: Creating files in directory P2_uux_ttxccx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -687,7 +687,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx INFO: Creating files in directory P2_ucx_ttxucx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -704,7 +704,7 @@ INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx INFO: Creating files in directory P2_uxcx_ttxuxcx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -721,7 +721,7 @@ INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -738,7 +738,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -755,7 +755,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux INFO: Creating files in directory P1_uux_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -772,7 +772,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group uux_ttxg INFO: Creating files in directory P0_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -789,7 +789,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group gg_ttx INFO: Creating files in directory P0_uux_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -804,15 +804,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -Generated helas calls for 18 subprocesses (372 diagrams) in 1.359 s -Wrote files for 810 helas calls in 3.440 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.374 s +Wrote files for 810 helas calls in 3.456 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.356 s +ALOHA: aloha creates 5 routines in 0.355 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -820,7 +820,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.331 s +ALOHA: aloha creates 10 routines in 0.327 s VVV1 VVV1 FFV1 @@ -1030,10 +1030,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m10.453s -user 0m8.760s -sys 0m0.481s -Code generation completed in 10 seconds +real 0m9.344s +user 0m8.848s +sys 0m0.461s +Code generation completed in 9 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt index a8fd34b0cf..263e86554b 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt @@ -77,7 +77,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.14514756202697754  +DEBUG: model prefixing takes 0.1473391056060791  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -92,7 +92,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 3.882 s +1 processes with 72 diagrams generated in 3.918 s Total: 1 processes with 72 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_smeft_gg_tttt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -115,7 +115,7 @@ INFO: Generating Helas calls for process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ t t~ @1 INFO: Creating files in directory P1_gg_ttxttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -130,7 +130,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxttx -Generated helas calls for 1 subprocesses (72 diagrams) in 0.198 s +Generated helas calls for 1 subprocesses (72 diagrams) in 0.196 s Wrote files for 119 helas calls in 0.436 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines @@ -138,7 +138,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.335 s +ALOHA: aloha creates 5 routines in 0.340 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines @@ -146,7 +146,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 10 routines in 0.346 s +ALOHA: aloha creates 10 routines in 0.353 s VVV5 VVV5 FFV1 @@ -191,9 +191,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m7.130s -user 0m6.861s -sys 0m0.249s +real 0m7.185s +user 0m6.891s +sys 0m0.275s Code generation completed in 7 seconds ************************************************************ * * diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt index f5a4be27a2..28afb306f5 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt @@ -77,7 +77,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.14623308181762695  +DEBUG: model prefixing takes 0.14701032638549805  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -92,7 +92,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 3.900 s +1 processes with 72 diagrams generated in 3.917 s Total: 1 processes with 72 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt Load PLUGIN.CUDACPP_OUTPUT @@ -123,7 +123,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.333 s +ALOHA: aloha creates 5 routines in 0.338 s VVV5 VVV5 FFV1 @@ -143,7 +143,7 @@ INFO: Created files Parameters_SMEFTsim_topU3l_MwScheme_UFO.h and Parameters_SME INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. quit -real 0m5.362s -user 0m5.249s -sys 0m0.090s +real 0m5.392s +user 0m5.280s +sys 0m0.075s Code generation completed in 5 seconds diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt index a1beb5bbec..ecf78249d6 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt @@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.129 s +1 processes with 6 diagrams generated in 0.130 s Total: 1 processes with 6 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_t1t1 --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -577,7 +577,7 @@ INFO: Generating Helas calls for process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t1 t1~ @1 INFO: Creating files in directory P1_gg_t1t1x DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -593,18 +593,18 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_t1t1x Generated helas calls for 1 subprocesses (6 diagrams) in 0.008 s -Wrote files for 16 helas calls in 0.115 s +Wrote files for 16 helas calls in 0.116 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.192 s +ALOHA: aloha creates 3 routines in 0.195 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 6 routines in 0.189 s +ALOHA: aloha creates 6 routines in 0.192 s VVV1 VSS1 VSS1 @@ -645,10 +645,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m4.265s -user 0m2.653s -sys 0m0.247s -Code generation completed in 4 seconds +real 0m2.894s +user 0m2.641s +sys 0m0.253s +Code generation completed in 3 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt index 12d38d134f..dbbb4dc1d8 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt @@ -599,7 +599,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. quit -real 0m1.397s -user 0m1.319s -sys 0m0.072s +real 0m1.404s +user 0m1.322s +sys 0m0.076s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt index 92da114f35..a815e2ae7a 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt @@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.124 s +1 processes with 3 diagrams generated in 0.123 s Total: 1 processes with 3 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_tt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -577,7 +577,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -597,12 +597,12 @@ Wrote files for 10 helas calls in 0.108 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.144 s +ALOHA: aloha creates 2 routines in 0.143 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.141 s +ALOHA: aloha creates 4 routines in 0.139 s VVV1 FFV1 FFV1 @@ -638,9 +638,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.762s -user 0m2.516s -sys 0m0.245s +real 0m2.770s +user 0m2.520s +sys 0m0.249s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt index 98681b7916..0f5cabf3db 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt @@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.124 s +1 processes with 3 diagrams generated in 0.136 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_tt Load PLUGIN.CUDACPP_OUTPUT @@ -582,7 +582,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.144 s +ALOHA: aloha creates 2 routines in 0.423 s VVV1 FFV1 FFV1 @@ -597,7 +597,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. quit -real 0m1.344s -user 0m1.272s -sys 0m0.066s -Code generation completed in 1 seconds +real 0m1.640s +user 0m1.275s +sys 0m0.067s +Code generation completed in 2 seconds From dac61e30c9ae9998c6cfa49aa371a2361b130614 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 21 May 2024 23:25:29 +0200 Subject: [PATCH 48/48] [jtmk2] regenerate all processes = no change except for codegen logs --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 16 +++--- .../CODEGEN_cudacpp_ee_mumu_log.txt | 12 ++-- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 14 ++--- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 12 ++-- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 24 ++++---- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 18 +++--- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 8 +-- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 20 +++---- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 8 +-- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 20 +++---- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 14 ++--- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 22 ++++---- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 10 ++-- .../CODEGEN_mad_heft_gg_bb_log.txt | 14 ++--- .../CODEGEN_cudacpp_heft_gg_bb_log.txt | 10 ++-- .../CODEGEN_mad_pp_tt012j_log.txt | 56 +++++++++---------- .../CODEGEN_mad_smeft_gg_tttt_log.txt | 20 +++---- .../CODEGEN_cudacpp_smeft_gg_tttt_log.txt | 16 +++--- .../CODEGEN_mad_susy_gg_t1t1_log.txt | 18 +++--- .../CODEGEN_cudacpp_susy_gg_t1t1_log.txt | 12 ++-- .../CODEGEN_mad_susy_gg_tt_log.txt | 16 +++--- .../CODEGEN_cudacpp_susy_gg_tt_log.txt | 14 ++--- 22 files changed, 187 insertions(+), 187 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index c36133a43d..f08289bad0 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005721330642700195  +DEBUG: model prefixing takes 0.0057582855224609375  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -177,7 +177,7 @@ INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 INFO: Creating files in directory P1_epem_mupmum DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -194,19 +194,19 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group epem_mupmum Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -Wrote files for 8 helas calls in 0.102 s +Wrote files for 8 helas calls in 0.103 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.205 s +ALOHA: aloha creates 3 routines in 0.209 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.263 s +ALOHA: aloha creates 7 routines in 0.272 s FFV1 FFV1 FFV2 @@ -250,9 +250,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.183s -user 0m1.677s -sys 0m0.231s +real 0m1.930s +user 0m1.716s +sys 0m0.204s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index 9f78c9e91d..f845f639cc 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005715131759643555  +DEBUG: model prefixing takes 0.00582575798034668  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -154,7 +154,7 @@ INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Process has 2 diagrams -1 processes with 2 diagrams generated in 0.004 s +1 processes with 2 diagrams generated in 0.005 s Total: 1 processes with 2 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_ee_mumu Load PLUGIN.CUDACPP_OUTPUT @@ -184,7 +184,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.275 s +ALOHA: aloha creates 4 routines in 0.287 s FFV1 FFV1 FFV2 @@ -203,7 +203,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. quit -real 0m0.667s -user 0m0.613s -sys 0m0.050s +real 0m0.836s +user 0m0.670s +sys 0m0.059s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index 58b78d00c9..bcd13f1f43 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005684614181518555  +DEBUG: model prefixing takes 0.005464315414428711  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -178,7 +178,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -198,12 +198,12 @@ Wrote files for 10 helas calls in 0.105 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.151 s +ALOHA: aloha creates 2 routines in 0.153 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.137 s +ALOHA: aloha creates 4 routines in 0.142 s VVV1 FFV1 FFV1 @@ -239,9 +239,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.020s -user 0m1.542s -sys 0m0.197s +real 0m1.771s +user 0m1.550s +sys 0m0.221s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index fa79b1b42c..74599408a5 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005660295486450195  +DEBUG: model prefixing takes 0.005803108215332031  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -183,7 +183,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.161 s +ALOHA: aloha creates 2 routines in 0.149 s VVV1 FFV1 FFV1 @@ -198,7 +198,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. quit -real 0m0.780s -user 0m0.489s -sys 0m0.054s -Code generation completed in 1 seconds +real 0m0.703s +user 0m0.492s +sys 0m0.049s +Code generation completed in 0 seconds diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index 3c3ef65898..e0bc256894 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005795478820800781  +DEBUG: model prefixing takes 0.005784273147583008  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.008 s +1 processes with 3 diagrams generated in 0.009 s Total: 1 processes with 3 diagrams add process g g > t t~ g INFO: Checking for minimal orders which gives processes. @@ -163,7 +163,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @2 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.020 s +1 processes with 16 diagrams generated in 0.021 s Total: 2 processes with 19 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt01g --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -188,7 +188,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P2_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -205,7 +205,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -220,15 +220,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx -Generated helas calls for 2 subprocesses (19 diagrams) in 0.044 s -Wrote files for 46 helas calls in 0.253 s +Generated helas calls for 2 subprocesses (19 diagrams) in 0.045 s +Wrote files for 46 helas calls in 0.279 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.344 s +ALOHA: aloha creates 5 routines in 0.338 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -236,7 +236,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.328 s +ALOHA: aloha creates 10 routines in 0.323 s VVV1 VVV1 FFV1 @@ -285,9 +285,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.376s -user 0m2.126s -sys 0m0.249s +real 0m2.389s +user 0m2.141s +sys 0m0.224s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index d2f40f6c13..d226034616 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005821704864501953  +DEBUG: model prefixing takes 0.005830287933349609  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -178,7 +178,7 @@ INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -193,15 +193,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg -Generated helas calls for 1 subprocesses (16 diagrams) in 0.040 s -Wrote files for 36 helas calls in 0.158 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.039 s +Wrote files for 36 helas calls in 0.156 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.342 s +ALOHA: aloha creates 5 routines in 0.344 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -209,7 +209,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.323 s +ALOHA: aloha creates 10 routines in 0.328 s VVV1 VVV1 FFV1 @@ -254,9 +254,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.436s -user 0m2.028s -sys 0m0.221s +real 0m2.256s +user 0m2.049s +sys 0m0.205s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index de84b379b4..e94069458a 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005735158920288086  +DEBUG: model prefixing takes 0.005761384963989258  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -186,7 +186,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.339 s +ALOHA: aloha creates 5 routines in 0.336 s VVV1 VVV1 FFV1 @@ -206,7 +206,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. quit -real 0m1.163s -user 0m0.744s +real 0m0.797s +user 0m0.737s sys 0m0.056s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index b30b3f9586..2ce03bcd21 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005646944046020508  +DEBUG: model prefixing takes 0.005746126174926758  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.166 s +1 processes with 123 diagrams generated in 0.164 s Total: 1 processes with 123 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -178,7 +178,7 @@ INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 INFO: Creating files in directory P1_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -193,15 +193,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg -Generated helas calls for 1 subprocesses (123 diagrams) in 0.449 s -Wrote files for 222 helas calls in 0.721 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.452 s +Wrote files for 222 helas calls in 0.720 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.345 s +ALOHA: aloha creates 5 routines in 0.343 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -257,10 +257,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.460s -user 0m3.145s -sys 0m0.235s -Code generation completed in 3 seconds +real 0m3.664s +user 0m3.149s +sys 0m0.231s +Code generation completed in 4 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index 91c34f2ddf..88a6d07d2d 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -186,7 +186,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.334 s +ALOHA: aloha creates 5 routines in 0.333 s VVV1 VVV1 FFV1 @@ -209,7 +209,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. quit -real 0m1.548s -user 0m1.422s -sys 0m0.066s +real 0m1.630s +user 0m1.428s +sys 0m0.058s Code generation completed in 2 seconds diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index ebb3d7f3f9..2588d0cd2e 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005715847015380859  +DEBUG: model prefixing takes 0.005673885345458984  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.959 s +1 processes with 1240 diagrams generated in 1.945 s Total: 1 processes with 1240 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -180,7 +180,7 @@ INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] INFO: Color-Flow passed to 1630 term in 8s. Introduce 3030 contraction DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -195,15 +195,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.869 s -Wrote files for 2281 helas calls in 19.196 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.855 s +Wrote files for 2281 helas calls in 19.401 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.329 s +ALOHA: aloha creates 5 routines in 0.330 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -211,7 +211,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.323 s +ALOHA: aloha creates 10 routines in 0.322 s VVV1 VVV1 FFV1 @@ -259,9 +259,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m30.424s -user 0m29.841s -sys 0m0.390s +real 0m30.499s +user 0m29.983s +sys 0m0.419s Code generation completed in 31 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index 102c039911..6b5074a2c1 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005713939666748047  +DEBUG: model prefixing takes 0.005708456039428711  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.956 s +1 processes with 1240 diagrams generated in 1.997 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -178,7 +178,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.867 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.860 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -186,7 +186,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.368 s +ALOHA: aloha creates 5 routines in 0.365 s VVV1 VVV1 FFV1 @@ -209,7 +209,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. quit -real 0m14.024s -user 0m13.379s -sys 0m0.108s +real 0m13.606s +user 0m13.435s +sys 0m0.117s Code generation completed in 14 seconds diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index 39a6a70bdb..60a7e04bfe 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005742073059082031  +DEBUG: model prefixing takes 0.005821704864501953  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -170,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.081 s +8 processes with 40 diagrams generated in 0.083 s Total: 8 processes with 40 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -201,7 +201,7 @@ INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -218,7 +218,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -233,17 +233,17 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux -Generated helas calls for 2 subprocesses (10 diagrams) in 0.032 s -Wrote files for 32 helas calls in 0.228 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.033 s +Wrote files for 32 helas calls in 0.236 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.152 s +ALOHA: aloha creates 2 routines in 0.156 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.139 s +ALOHA: aloha creates 4 routines in 0.141 s FFV1 FFV1 FFV1 @@ -296,9 +296,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.997s -user 0m1.767s -sys 0m0.219s +real 0m2.028s +user 0m1.809s +sys 0m0.218s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index 87d22b379f..c9b53c9d92 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0056705474853515625  +DEBUG: model prefixing takes 0.0056972503662109375  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -215,7 +215,7 @@ Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.149 s +ALOHA: aloha creates 2 routines in 0.151 s FFV1 FFV1 FFV1 @@ -231,7 +231,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. quit -real 0m0.666s -user 0m0.617s -sys 0m0.044s +real 0m0.671s +user 0m0.612s +sys 0m0.054s Code generation completed in 0 seconds diff --git a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt index 451b95c9d8..703d24d998 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt @@ -150,7 +150,7 @@ INFO: Generating Helas calls for process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Processing color information for process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Creating files in directory P1_gg_bbx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -166,20 +166,20 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Finding symmetric diagrams for subprocess group gg_bbx Generated helas calls for 1 subprocesses (4 diagrams) in 0.009 s -Wrote files for 12 helas calls in 0.109 s +Wrote files for 12 helas calls in 0.110 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.272 s +ALOHA: aloha creates 4 routines in 0.277 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 8 routines in 0.255 s +ALOHA: aloha creates 8 routines in 0.262 s VVS3 VVV1 FFV1 @@ -217,9 +217,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.056s -user 0m1.780s -sys 0m0.210s +real 0m2.061s +user 0m1.786s +sys 0m0.232s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt index 6fcf07675b..599dc14f9e 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt @@ -66,7 +66,7 @@ INFO: load particles INFO: load vertices WARNING: coupling GC_13=-(complex(0,1)*GH) has direct dependence in aS but has QCD order set to 0. Automatic computation of scale uncertainty can be wrong for such model.  WARNING: coupling GC_16=(complex(0,1)*Gphi)/8. has direct dependence in aS but has QCD order set to 0. Automatic computation of scale uncertainty can be wrong for such model.  -DEBUG: model prefixing takes 0.005918025970458984  +DEBUG: model prefixing takes 0.0062215328216552734  INFO: Restrict model heft with file models/heft/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: s u w+ at order: QED=1  @@ -162,7 +162,7 @@ ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.273 s +ALOHA: aloha creates 4 routines in 0.274 s VVS3 VVV1 FFV1 @@ -179,7 +179,7 @@ INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. quit -real 0m0.707s -user 0m0.609s -sys 0m0.063s +real 0m0.684s +user 0m0.625s +sys 0m0.055s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index 5a9d08cd3b..cc5cbed2bb 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005757570266723633  +DEBUG: model prefixing takes 0.005604982376098633  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -212,7 +212,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.143 s +13 processes with 76 diagrams generated in 0.142 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -378,7 +378,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.919 s +65 processes with 1119 diagrams generated in 1.906 s Total: 83 processes with 1202 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -500,7 +500,7 @@ INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Creating files in directory P2_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -517,7 +517,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg INFO: Creating files in directory P2_gg_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -534,7 +534,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux INFO: Creating files in directory P2_gu_ttxgu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -551,7 +551,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu INFO: Creating files in directory P2_gux_ttxgux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -568,7 +568,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux INFO: Creating files in directory P2_uux_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -585,7 +585,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -602,7 +602,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P2_uu_ttxuu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -619,7 +619,7 @@ INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu INFO: Creating files in directory P2_uux_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -636,7 +636,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux INFO: Creating files in directory P2_uxux_ttxuxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -653,7 +653,7 @@ INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux INFO: Creating files in directory P2_uc_ttxuc DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -670,7 +670,7 @@ INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc INFO: Creating files in directory P2_uux_ttxccx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -687,7 +687,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx INFO: Creating files in directory P2_ucx_ttxucx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -704,7 +704,7 @@ INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx INFO: Creating files in directory P2_uxcx_ttxuxcx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -721,7 +721,7 @@ INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -738,7 +738,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -755,7 +755,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux INFO: Creating files in directory P1_uux_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -772,7 +772,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group uux_ttxg INFO: Creating files in directory P0_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -789,7 +789,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group gg_ttx INFO: Creating files in directory P0_uux_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -804,15 +804,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -Generated helas calls for 18 subprocesses (372 diagrams) in 1.344 s -Wrote files for 810 helas calls in 3.818 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.352 s +Wrote files for 810 helas calls in 3.380 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.347 s +ALOHA: aloha creates 5 routines in 0.352 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -820,7 +820,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.324 s +ALOHA: aloha creates 10 routines in 0.325 s VVV1 VVV1 FFV1 @@ -1030,9 +1030,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m9.573s -user 0m8.691s -sys 0m0.428s +real 0m9.708s +user 0m8.687s +sys 0m0.423s Code generation completed in 10 seconds ************************************************************ * * diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt index fc3bde1247..70288a15e0 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt @@ -77,7 +77,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.1441042423248291  +DEBUG: model prefixing takes 0.14343023300170898  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -92,7 +92,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 3.863 s +1 processes with 72 diagrams generated in 3.871 s Total: 1 processes with 72 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_smeft_gg_tttt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -115,7 +115,7 @@ INFO: Generating Helas calls for process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ t t~ @1 INFO: Creating files in directory P1_gg_ttxttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -130,15 +130,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxttx -Generated helas calls for 1 subprocesses (72 diagrams) in 0.199 s -Wrote files for 119 helas calls in 0.434 s +Generated helas calls for 1 subprocesses (72 diagrams) in 0.195 s +Wrote files for 119 helas calls in 0.431 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.326 s +ALOHA: aloha creates 5 routines in 0.328 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines @@ -146,7 +146,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 10 routines in 0.340 s +ALOHA: aloha creates 10 routines in 0.342 s VVV5 VVV5 FFV1 @@ -191,9 +191,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m7.076s -user 0m6.791s -sys 0m0.267s +real 0m7.071s +user 0m6.803s +sys 0m0.249s Code generation completed in 7 seconds ************************************************************ * * diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt index 8a4609634c..ad5f437053 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt @@ -77,7 +77,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.14406657218933105  +DEBUG: model prefixing takes 0.14435124397277832  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -92,7 +92,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 3.846 s +1 processes with 72 diagrams generated in 3.831 s Total: 1 processes with 72 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt Load PLUGIN.CUDACPP_OUTPUT @@ -115,7 +115,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/. -Generated helas calls for 1 subprocesses (72 diagrams) in 0.195 s +Generated helas calls for 1 subprocesses (72 diagrams) in 0.193 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines @@ -123,7 +123,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.335 s +ALOHA: aloha creates 5 routines in 0.327 s VVV5 VVV5 FFV1 @@ -143,7 +143,7 @@ INFO: Created files Parameters_SMEFTsim_topU3l_MwScheme_UFO.h and Parameters_SME INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. quit -real 0m5.291s -user 0m5.196s -sys 0m0.076s -Code generation completed in 5 seconds +real 0m5.274s +user 0m5.174s +sys 0m0.072s +Code generation completed in 6 seconds diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt index 67e6cba085..2b28aa829b 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt @@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.126 s +1 processes with 6 diagrams generated in 0.131 s Total: 1 processes with 6 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_t1t1 --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -577,7 +577,7 @@ INFO: Generating Helas calls for process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t1 t1~ @1 INFO: Creating files in directory P1_gg_t1t1x DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -592,19 +592,19 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_t1t1x -Generated helas calls for 1 subprocesses (6 diagrams) in 0.008 s -Wrote files for 16 helas calls in 0.114 s +Generated helas calls for 1 subprocesses (6 diagrams) in 0.009 s +Wrote files for 16 helas calls in 0.117 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.193 s +ALOHA: aloha creates 3 routines in 0.195 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 6 routines in 0.188 s +ALOHA: aloha creates 6 routines in 0.192 s VVV1 VSS1 VSS1 @@ -645,9 +645,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.868s -user 0m2.619s -sys 0m0.249s +real 0m2.918s +user 0m2.669s +sys 0m0.245s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt index 9cf89e16b5..3531f3a8c6 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt @@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.127 s +1 processes with 6 diagrams generated in 0.130 s Total: 1 processes with 6 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1 Load PLUGIN.CUDACPP_OUTPUT @@ -583,7 +583,7 @@ ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.191 s +ALOHA: aloha creates 3 routines in 0.194 s VVV1 VSS1 VSS1 @@ -599,7 +599,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. quit -real 0m1.380s -user 0m1.311s -sys 0m0.062s -Code generation completed in 2 seconds +real 0m1.427s +user 0m1.344s +sys 0m0.074s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt index 0fd4004266..bfb96cff2a 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt @@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.122 s +1 processes with 3 diagrams generated in 0.126 s Total: 1 processes with 3 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_tt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -577,7 +577,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -593,16 +593,16 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.107 s +Wrote files for 10 helas calls in 0.110 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.141 s +ALOHA: aloha creates 2 routines in 0.145 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.137 s +ALOHA: aloha creates 4 routines in 0.141 s VVV1 FFV1 FFV1 @@ -638,9 +638,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.736s -user 0m2.505s -sys 0m0.229s +real 0m2.857s +user 0m2.538s +sys 0m0.270s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt index eb1cace7ab..09a04f791a 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F import model MSSM_SLHA2 INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.9385032653808594  +DEBUG: model prefixing takes 0.9576148986816406  INFO: Restrict model MSSM_SLHA2 with file models/MSSM_SLHA2/restrict_default.dat . INFO: Detect SLHA2 format. keeping restricted parameter in the param_card DEBUG: Simplifying conditional expressions  @@ -557,7 +557,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.112 s +1 processes with 3 diagrams generated in 0.117 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_tt Load PLUGIN.CUDACPP_OUTPUT @@ -585,7 +585,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.143 s +ALOHA: aloha creates 2 routines in 0.145 s VVV1 FFV1 FFV1 @@ -600,7 +600,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. quit -real 0m2.443s -user 0m2.351s -sys 0m0.073s -Code generation completed in 2 seconds +real 0m2.489s +user 0m2.409s +sys 0m0.070s +Code generation completed in 3 seconds